mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-05 05:26:55 +00:00
Merge branch 'master' of https://github.com/yt-dlp/yt-dlp into fix/ie/EuroParlWebstream
This commit is contained in:
@@ -338,7 +338,6 @@ from .canalc2 import Canalc2IE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalsurmas import CanalsurmasIE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCGemIE,
|
||||
@@ -929,7 +928,10 @@ from .jiocinema import (
|
||||
)
|
||||
from .jiosaavn import (
|
||||
JioSaavnAlbumIE,
|
||||
JioSaavnArtistIE,
|
||||
JioSaavnPlaylistIE,
|
||||
JioSaavnShowIE,
|
||||
JioSaavnShowPlaylistIE,
|
||||
JioSaavnSongIE,
|
||||
)
|
||||
from .joj import JojIE
|
||||
@@ -1042,6 +1044,7 @@ from .limelight import (
|
||||
LimelightMediaIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInEventsIE,
|
||||
LinkedInIE,
|
||||
LinkedInLearningCourseIE,
|
||||
LinkedInLearningIE,
|
||||
@@ -1783,7 +1786,6 @@ from .rtvcplay import (
|
||||
from .rtve import (
|
||||
RTVEALaCartaIE,
|
||||
RTVEAudioIE,
|
||||
RTVEInfantilIE,
|
||||
RTVELiveIE,
|
||||
RTVETelevisionIE,
|
||||
)
|
||||
@@ -1964,7 +1966,6 @@ from .spreaker import (
|
||||
SpreakerShowIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .sproutvideo import (
|
||||
SproutVideoIE,
|
||||
VidsIoIE,
|
||||
@@ -2146,6 +2147,7 @@ from .toggle import (
|
||||
from .toggo import ToggoIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutiao import ToutiaoIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import (
|
||||
ToypicsIE,
|
||||
@@ -2237,7 +2239,10 @@ from .tvplay import (
|
||||
TVPlayIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tvw import TvwIE
|
||||
from .tvw import (
|
||||
TvwIE,
|
||||
TvwTvChannelsIE,
|
||||
)
|
||||
from .tweakers import TweakersIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
@@ -2365,6 +2370,7 @@ from .vimeo import (
|
||||
VHXEmbedIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoChannelIE,
|
||||
VimeoEventIE,
|
||||
VimeoGroupsIE,
|
||||
VimeoIE,
|
||||
VimeoLikesIE,
|
||||
|
||||
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -417,6 +418,10 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
|
||||
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
|
||||
info['thumbnails'] = [{'url': thumbnail}]
|
||||
|
||||
return info
|
||||
|
||||
|
||||
|
||||
@@ -1,32 +1,24 @@
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
|
||||
class AMCNetworksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/?#]+)+)/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
|
||||
'url': 'https://www.amc.com/shows/dark-winds/videos/dark-winds-a-look-at-season-3--1072027',
|
||||
'info_dict': {
|
||||
'id': '4Lq1dzOnZGt0',
|
||||
'id': '6369261343112',
|
||||
'ext': 'mp4',
|
||||
'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
|
||||
'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
|
||||
'upload_date': '20201120',
|
||||
'timestamp': 1605904350,
|
||||
'uploader': 'AMCN',
|
||||
'title': 'Dark Winds: A Look at Season 3',
|
||||
'uploader_id': '6240731308001',
|
||||
'duration': 176.427,
|
||||
'thumbnail': r're:https://[^/]+\.boltdns\.net/.+/image\.jpg',
|
||||
'tags': [],
|
||||
'timestamp': 1740414792,
|
||||
'upload_date': '20250224',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
@@ -52,96 +44,18 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_REQUESTOR_ID_MAP = {
|
||||
'amc': 'AMC',
|
||||
'bbcamerica': 'BBCA',
|
||||
'ifc': 'IFC',
|
||||
'sundancetv': 'SUNDANCE',
|
||||
'wetv': 'WETV',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
page_data = self._download_json(
|
||||
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
|
||||
display_id)['data']
|
||||
properties = page_data.get('properties') or {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
initial_data = self._search_json(
|
||||
r'window\.initialData\s*=\s*JSON\.parse\(String\.raw`', webpage, 'initial data', display_id)
|
||||
video_id = traverse_obj(initial_data, ('initialData', 'properties', 'videoId', {str}))
|
||||
if not video_id: # All locked videos are now DRM-protected
|
||||
self.report_drm(display_id)
|
||||
account_id = initial_data['config']['brightcove']['accountId']
|
||||
player_id = initial_data['config']['brightcove']['playerId']
|
||||
|
||||
video_player_count = 0
|
||||
try:
|
||||
for v in page_data['children']:
|
||||
if v.get('type') == 'video-player':
|
||||
release_pid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + release_pid
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
video_player_count += 1
|
||||
except KeyError:
|
||||
pass
|
||||
if video_player_count > 1:
|
||||
self.report_warning(
|
||||
f'The JSON data has {video_player_count} video players. Only one will be extracted')
|
||||
|
||||
# Fall back to videoPid if releasePid not found.
|
||||
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||
if not video_player_count:
|
||||
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
|
||||
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
video_category = properties.get('videoCategory')
|
||||
if video_category and video_category.endswith('-Auth'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
media_url, video_id)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_urls = [properties.get('imageDesktop')]
|
||||
if 'thumbnail' in info:
|
||||
thumbnail_urls.append(info.pop('thumbnail'))
|
||||
for thumbnail_url in thumbnail_urls:
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int(mobj.group(1)) if mobj else None,
|
||||
'height': int(mobj.group(2)) if mobj else None,
|
||||
})
|
||||
|
||||
info.update({
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = next(iter(ns_keys))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
season_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$season'))
|
||||
series = theplatform_metadata.get(ns + '$show') or None
|
||||
info.update({
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'season_number': season_number,
|
||||
'series': series,
|
||||
})
|
||||
return info
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
|
||||
BrightcoveNewIE, video_id)
|
||||
|
||||
@@ -1,64 +1,105 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
|
||||
'info_dict': {
|
||||
'id': '5d4aa2c57ed1a88fc715a615',
|
||||
'ext': 'mp4',
|
||||
'title': 'Capítulo 7: Asuntos pendientes',
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67f2dfb2fb6ab0e4c7203849',
|
||||
'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
|
||||
'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
|
||||
'channel': 'laSexta',
|
||||
'duration': 31,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
|
||||
'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
|
||||
'series': 'El Objetivo',
|
||||
'season': 'Temporada 12',
|
||||
'timestamp': 1743970079,
|
||||
'upload_date': '20250406',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67f836baa4a5b0e4147ca59a',
|
||||
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
|
||||
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
|
||||
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
|
||||
'channel': 'Antena 3',
|
||||
'duration': 2556,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
|
||||
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
|
||||
'series': 'El Hormiguero ',
|
||||
'season': 'Temporada 14',
|
||||
'timestamp': 1744320111,
|
||||
'upload_date': '20250410',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67a6038b64ceca00070f4f69',
|
||||
'display_id': 'capitulo-3-supervivientes',
|
||||
'title': 'Capítulo 3: Supervivientes',
|
||||
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
|
||||
'channel': 'Flooxer',
|
||||
'duration': 1196,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
|
||||
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
|
||||
'series': 'BIARA: Proyecto Lázarus',
|
||||
'season': 'Temporada 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 3',
|
||||
'episode_number': 3,
|
||||
'timestamp': 1743095191,
|
||||
'upload_date': '20250327',
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
try:
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
self._download_webpage(
|
||||
'https://account.atresplayer.com/auth/v1/login', None,
|
||||
'Logging in', 'Failed to log in', data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
}))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
metadata_url = self._download_json(
|
||||
self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
|
||||
query={'href': urllib.parse.urlparse(url).path})['href']
|
||||
metadata = self._download_json(metadata_url, video_id)
|
||||
|
||||
try:
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
@@ -67,37 +108,45 @@ class AtresPlayerIE(InfoExtractor):
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
title = episode['titulo']
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for source in episode.get('sources', []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
|
||||
src_url = source['src']
|
||||
src_type = source.get('type')
|
||||
if src_type == 'application/vnd.apple.mpegurl':
|
||||
formats, subtitles = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
elif src_type == 'application/dash+xml':
|
||||
formats, subtitles = self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
|
||||
heartbeat = episode.get('heartbeat') or {}
|
||||
omniture = episode.get('omniture') or {}
|
||||
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
|
||||
if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif src_type in ('application/dash+xml', 'application/dash+hevc'):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': episode.get('descripcion'),
|
||||
'thumbnail': episode.get('imgPoster'),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'formats': formats,
|
||||
'channel': get_meta('channel'),
|
||||
'season': get_meta('season'),
|
||||
'episode_number': int_or_none(get_meta('episodeNumber')),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('titulo', {str}),
|
||||
'description': ('descripcion', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
|
||||
'age_limit': ('ageRating', {parse_age_limit}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'tags': ('tags', ..., 'title', {str}),
|
||||
'age_limit': ('ageRating', {parse_age_limit}),
|
||||
'series': ('format', 'title', {str}),
|
||||
'season': ('currentSeason', 'title', {str}),
|
||||
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
|
||||
'episode_number': ('numberOfEpisode', {int_or_none}),
|
||||
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,30 +1,32 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
format_field,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_element_html_by_class,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/?#]+)/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
@@ -34,12 +36,17 @@ class BitChuteIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'view_count': int,
|
||||
'duration': 16.0,
|
||||
'timestamp': 1483425443,
|
||||
},
|
||||
}, {
|
||||
# test case: video with different channel and uploader
|
||||
@@ -49,13 +56,18 @@ class BitChuteIE(InfoExtractor):
|
||||
'id': 'Yti_j9A-UZ4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Israel at War | Full Measure',
|
||||
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:e60198b89971966d6030d22b3268f08f',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'sharylattkisson',
|
||||
'upload_date': '20231106',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
||||
'channel': 'Full Measure with Sharyl Attkisson',
|
||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
|
||||
'uploader_id': '9K0kUWA9zmd9',
|
||||
'channel_id': 'NpdxoCRv3ZLb',
|
||||
'view_count': int,
|
||||
'duration': 554.0,
|
||||
'timestamp': 1699296106,
|
||||
},
|
||||
}, {
|
||||
# video not downloadable in browser, but we can recover it
|
||||
@@ -66,25 +78,21 @@ class BitChuteIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'filesize': 71537926,
|
||||
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
|
||||
'description': 'md5:228ee93bd840a24938f536aeac9cf749',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2029c7c212ccd4b040f52bb2d036ef4e',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20181113',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'view_count': int,
|
||||
'duration': 1701.0,
|
||||
'tags': ['bitchute'],
|
||||
'timestamp': 1542130287,
|
||||
},
|
||||
'params': {'check_formats': None},
|
||||
}, {
|
||||
# restricted video
|
||||
'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
|
||||
'info_dict': {
|
||||
'id': 'WEnQU7XGcTdl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Georestricted in DE',
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
@@ -96,11 +104,8 @@ class BitChuteIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
'Referer': 'https://www.bitchute.com/',
|
||||
}
|
||||
_UPLOADER_URL_TMPL = 'https://www.bitchute.com/profile/%s/'
|
||||
_CHANNEL_URL_TMPL = 'https://www.bitchute.com/channel/%s/'
|
||||
|
||||
def _check_format(self, video_url, video_id):
|
||||
urls = orderedSet(
|
||||
@@ -112,7 +117,7 @@ class BitChuteIE(InfoExtractor):
|
||||
for url in urls:
|
||||
try:
|
||||
response = self._request_webpage(
|
||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
|
||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}')
|
||||
except ExtractorError as e:
|
||||
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
|
||||
continue
|
||||
@@ -121,54 +126,79 @@ class BitChuteIE(InfoExtractor):
|
||||
'filesize': int_or_none(response.headers.get('Content-Length')),
|
||||
}
|
||||
|
||||
def _raise_if_restricted(self, webpage):
|
||||
page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
|
||||
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
|
||||
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
||||
self.raise_geo_restricted(reason)
|
||||
|
||||
@staticmethod
|
||||
def _make_url(html):
|
||||
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
|
||||
return urljoin('https://www.bitchute.com', path)
|
||||
def _call_api(self, endpoint, data, display_id, fatal=True):
|
||||
note = endpoint.rpartition('/')[2]
|
||||
try:
|
||||
return self._download_json(
|
||||
f'https://api.bitchute.com/api/beta/{endpoint}', display_id,
|
||||
f'Downloading {note} API JSON', f'Unable to download {note} API JSON',
|
||||
data=json.dumps(data).encode(),
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
errors = '. '.join(traverse_obj(e.cause.response.read().decode(), (
|
||||
{json.loads}, 'errors', lambda _, v: v['context'] == 'reason', 'message', {str})))
|
||||
if errors and 'location' in errors:
|
||||
# Can always be fatal since the video/media call will reach this code first
|
||||
self.raise_geo_restricted(errors)
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(e.msg)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
data = {'video_id': video_id}
|
||||
media_url = self._call_api('video/media', data, video_id)['media_url']
|
||||
|
||||
formats = []
|
||||
for format_ in traverse_obj(entries, (0, 'formats', ...)):
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls', live=True))
|
||||
else:
|
||||
if self.get_param('check_formats') is not False:
|
||||
format_.update(self._check_format(format_.pop('url'), video_id) or {})
|
||||
if 'url' not in format_:
|
||||
continue
|
||||
formats.append(format_)
|
||||
if fmt := self._check_format(media_url, video_id):
|
||||
formats.append(fmt)
|
||||
else:
|
||||
formats.append({'url': media_url})
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(
|
||||
'Video is unavailable. Please make sure this video is playable in the browser '
|
||||
'before reporting this issue.', expected=True, video_id=video_id)
|
||||
|
||||
details = get_element_by_class('details', webpage) or ''
|
||||
uploader_html = get_element_html_by_class('creator', details) or ''
|
||||
channel_html = get_element_html_by_class('name', details) or ''
|
||||
video = self._call_api('video', data, video_id, fatal=False)
|
||||
channel = None
|
||||
if channel_id := traverse_obj(video, ('channel', 'channel_id', {str})):
|
||||
channel = self._call_api('channel', {'channel_id': channel_id}, video_id, fatal=False)
|
||||
|
||||
return {
|
||||
**traverse_obj(video, {
|
||||
'title': ('video_name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'channel': ('channel', 'channel_name', {str}),
|
||||
'channel_id': ('channel', 'channel_id', {str}),
|
||||
'channel_url': ('channel', 'channel_url', {urljoin('https://www.bitchute.com/')}),
|
||||
'uploader_id': ('profile_id', {str}),
|
||||
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
|
||||
'timestamp': ('date_published', {parse_iso8601}),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'tags': ('hashtags', ..., {str}, filter, all, filter),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'is_live': ('state_id', {lambda x: x == 'live'}),
|
||||
}),
|
||||
**traverse_obj(channel, {
|
||||
'channel': ('channel_name', {str}),
|
||||
'channel_id': ('channel_id', {str}),
|
||||
'channel_url': ('url_slug', {format_field(template=self._CHANNEL_URL_TMPL)}, filter),
|
||||
'uploader': ('profile_name', {str}),
|
||||
'uploader_id': ('profile_id', {str}),
|
||||
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': clean_html(uploader_html),
|
||||
'uploader_url': self._make_url(uploader_html),
|
||||
'channel': clean_html(channel_html),
|
||||
'channel_url': self._make_url(channel_html),
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -190,7 +220,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
@@ -198,6 +228,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'timestamp': 1483425443,
|
||||
},
|
||||
},
|
||||
],
|
||||
@@ -213,6 +246,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -7,6 +7,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_resolution,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
@@ -110,24 +111,23 @@ class BpbIE(InfoExtractor):
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
def _process_source(self, source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
note = self._search_regex(r'[_-]([a-z]+)\.[\da-z]+(?:$|\?)', url, 'note', default=None)
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'vcodec': None if source_type.startswith('video') else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
**parse_resolution(source.get('label')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
|
||||
'info_dict': {
|
||||
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Draw Upgrade',
|
||||
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
|
||||
metadata_re = ''
|
||||
if content_re:
|
||||
metadata_re = r'|video_metadata\.content_' + content_re
|
||||
return self._search_regex(
|
||||
rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";',
|
||||
webpage, name, fatal=fatal)
|
||||
|
||||
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
|
||||
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, {'networkId': 'cartoonnetwork'}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': find_field('authType', 'auth type') != 'unauth',
|
||||
})
|
||||
|
||||
series = find_field(
|
||||
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
|
||||
info.update({
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'series': series,
|
||||
'episode': title,
|
||||
})
|
||||
|
||||
for field in ('season', 'episode'):
|
||||
field_name = field + 'Number'
|
||||
info[field + '_number'] = int_or_none(find_field(
|
||||
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
|
||||
|
||||
return info
|
||||
@@ -13,16 +13,17 @@ from ..compat import compat_ord
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CDAIE(InfoExtractor):
|
||||
@@ -290,34 +291,47 @@ class CDAIE(InfoExtractor):
|
||||
if not video or 'file' not in video:
|
||||
self.report_warning(f'Unable to extract {version} version information')
|
||||
return
|
||||
if video['file'].startswith('uggc'):
|
||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||
if video['file'].endswith('adc.mp4'):
|
||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||
elif not video['file'].startswith('http'):
|
||||
video['file'] = decrypt_file(video['file'])
|
||||
video_quality = video.get('quality')
|
||||
qualities = video.get('qualities', {})
|
||||
video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
|
||||
info_dict['formats'].append({
|
||||
'url': video['file'],
|
||||
'format_id': video_quality,
|
||||
'height': int_or_none(video_quality[:-1]),
|
||||
})
|
||||
if video.get('file'):
|
||||
if video['file'].startswith('uggc'):
|
||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||
if video['file'].endswith('adc.mp4'):
|
||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||
elif not video['file'].startswith('http'):
|
||||
video['file'] = decrypt_file(video['file'])
|
||||
info_dict['formats'].append({
|
||||
'url': video['file'],
|
||||
'format_id': video_quality,
|
||||
'height': int_or_none(video_quality[:-1]),
|
||||
})
|
||||
for quality, cda_quality in qualities.items():
|
||||
if quality == video_quality:
|
||||
continue
|
||||
data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
|
||||
'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
|
||||
data = json.dumps(data).encode()
|
||||
video_url = self._download_json(
|
||||
response = self._download_json(
|
||||
f'https://www.cda.pl/video/{video_id}', video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=data, note=f'Fetching {quality} url',
|
||||
errnote=f'Failed to fetch {quality} url', fatal=False)
|
||||
if try_get(video_url, lambda x: x['result']['status']) == 'ok':
|
||||
video_url = try_get(video_url, lambda x: x['result']['resp'])
|
||||
if (
|
||||
traverse_obj(response, ('result', 'status')) != 'ok'
|
||||
or not traverse_obj(response, ('result', 'resp', {url_or_none}))
|
||||
):
|
||||
continue
|
||||
video_url = response['result']['resp']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'mpd':
|
||||
info_dict['formats'].extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
info_dict['formats'].extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
info_dict['formats'].append({
|
||||
'url': video_url,
|
||||
'format_id': quality,
|
||||
@@ -353,7 +367,7 @@ class CDAIE(InfoExtractor):
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
@@ -378,6 +392,9 @@ class CDAFolderIE(InfoExtractor):
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}, {
|
||||
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
classproperty,
|
||||
float_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
@@ -91,11 +92,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
||||
return self._search_regex(
|
||||
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
||||
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
|
||||
'usp signing secret', group='secret', fatal=False) or 'hGDtqMKYVeFdofrAfFmBcrsakaZELajI'
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||
query = {
|
||||
'contentId': f'{user_id}-vod-{video_id}',
|
||||
'provider': 'universe',
|
||||
**traverse_obj(url, ({parse_qs}, 'uss_token', {'signedKey': -1})),
|
||||
}
|
||||
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
|
||||
access = self._download_json(
|
||||
'https://playback.dacast.com/content/access', video_id,
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
from .zdf import ZDFBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class DreiSatIE(ZDFBaseIE):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/?#]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.3sat.de/dokumentation/reise/traumziele-suedostasiens-die-philippinen-und-vietnam-102.html',
|
||||
'info_dict': {
|
||||
@@ -12,40 +18,59 @@ class DreiSatIE(ZDFBaseIE):
|
||||
'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'description': 'md5:26329ce5197775b596773b939354079d',
|
||||
'duration': 2625.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148',
|
||||
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~original?cb=1699870351148',
|
||||
'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95',
|
||||
'timestamp': 1738593000,
|
||||
'upload_date': '20250203',
|
||||
'timestamp': 1747920900,
|
||||
'upload_date': '20250522',
|
||||
},
|
||||
}, {
|
||||
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
|
||||
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
|
||||
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||
'url': 'https://www.3sat.de/film/ab-18/ab-18---mein-fremdes-ich-100.html',
|
||||
'md5': 'f92638413a11d759bdae95c9d8ec165c',
|
||||
'info_dict': {
|
||||
'id': '141007_ab18_10wochensommer_film',
|
||||
'id': '221128_mein_fremdes_ich2_ab18',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ab 18! - 10 Wochen Sommer',
|
||||
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
|
||||
'duration': 2660,
|
||||
'timestamp': 1608604200,
|
||||
'upload_date': '20201222',
|
||||
'title': 'Ab 18! - Mein fremdes Ich',
|
||||
'description': 'md5:cae0c0b27b7426d62ca0dda181738bf0',
|
||||
'duration': 2625.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/ab-18---mein-fremdes-ich-106~original?cb=1666081865812',
|
||||
'episode': 'Ab 18! - Mein fremdes Ich',
|
||||
'episode_id': 'POS_6225d1ca-a0d5-45e3-870b-e783ee6c8a3f',
|
||||
'timestamp': 1695081600,
|
||||
'upload_date': '20230919',
|
||||
},
|
||||
'skip': '410 Gone',
|
||||
}, {
|
||||
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
|
||||
'url': 'https://www.3sat.de/gesellschaft/37-grad-leben/aus-dem-leben-gerissen-102.html',
|
||||
'md5': 'a903eaf8d1fd635bd3317cd2ad87ec84',
|
||||
'info_dict': {
|
||||
'id': '140913_sendung_schweizweit',
|
||||
'id': '250323_0903_sendung_sgl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'timestamp': 1410623100,
|
||||
'upload_date': '20140913',
|
||||
'title': 'Plötzlich ohne dich',
|
||||
'description': 'md5:380cc10659289dd91510ad8fa717c66b',
|
||||
'duration': 1620.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/37-grad-leben-106~original?cb=1645537156810',
|
||||
'episode': 'Plötzlich ohne dich',
|
||||
'episode_id': 'POS_faa7a93c-c0f2-4d51-823f-ce2ac3ee191b',
|
||||
'timestamp': 1743162540,
|
||||
'upload_date': '20250328',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}, {
|
||||
# Video with chapters
|
||||
'url': 'https://www.3sat.de/kultur/buchmesse/dein-buch-das-beste-von-der-leipziger-buchmesse-2025-teil-1-100.html',
|
||||
'md5': '6b95790ce52e75f0d050adcdd2711ee6',
|
||||
'info_dict': {
|
||||
'id': '250330_dein_buch1_bum',
|
||||
'ext': 'mp4',
|
||||
'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844',
|
||||
'duration': 5399.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903',
|
||||
'chapters': 'count:24',
|
||||
'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb',
|
||||
'timestamp': 1743327000,
|
||||
'upload_date': '20250330',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
|
||||
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
|
||||
@@ -58,11 +83,42 @@ class DreiSatIE(ZDFBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player = self._search_json(
|
||||
r'data-zdfplayer-jsb=(["\'])', webpage, 'player JSON', video_id)
|
||||
player_url = player['content']
|
||||
api_token = f'Bearer {player["apiToken"]}'
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
if webpage:
|
||||
player = self._extract_player(webpage, url, fatal=False)
|
||||
if player:
|
||||
return self._extract_regular(url, player, video_id)
|
||||
content = self._call_api(player_url, video_id, 'video metadata', api_token)
|
||||
|
||||
return self._extract_mobile(video_id)
|
||||
video_target = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||
ptmd_path = traverse_obj(video_target, (
|
||||
(('streams', 'default'), None),
|
||||
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'),
|
||||
{str}, any, {require('ptmd path')}))
|
||||
ptmd_url = self._expand_ptmd_template(player_url, ptmd_path)
|
||||
aspect_ratio = self._parse_aspect_ratio(video_target.get('aspectRatio'))
|
||||
info = self._extract_ptmd(ptmd_url, video_id, api_token, aspect_ratio)
|
||||
|
||||
return merge_dicts(info, {
|
||||
**traverse_obj(content, {
|
||||
'title': (('title', 'teaserHeadline'), {str}, any),
|
||||
'episode': (('title', 'teaserHeadline'), {str}, any),
|
||||
'description': (('leadParagraph', 'teasertext'), {str}, any),
|
||||
'timestamp': ('editorialDate', {parse_iso8601}),
|
||||
}),
|
||||
**traverse_obj(video_target, {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'chapters': ('streamAnchorTag', {self._extract_chapters}),
|
||||
}),
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(content, ('teaserImageRef', 'layouts', {dict}))),
|
||||
**traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', {
|
||||
'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}),
|
||||
'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}),
|
||||
'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}),
|
||||
'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}),
|
||||
'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode_id': ('contentId', {str}),
|
||||
})),
|
||||
})
|
||||
|
||||
@@ -5,7 +5,6 @@ import urllib.parse
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
@@ -16,7 +15,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class ESPNIE(OnceIE):
|
||||
class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
@@ -131,9 +130,7 @@ class ESPNIE(OnceIE):
|
||||
return
|
||||
format_urls.add(source_url)
|
||||
ext = determine_ext(source_url)
|
||||
if OnceIE.suitable(source_url):
|
||||
formats.extend(self._extract_once_formats(source_url))
|
||||
elif ext == 'smil':
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
source_url, video_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
|
||||
@@ -2,11 +2,15 @@ import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
qualities,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
parse_qs,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
@@ -15,40 +19,51 @@ class FirstTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sport)?1tv\.ru/(?:[^/?#]+/)+(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single format
|
||||
'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
|
||||
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||
# single format; has item.id
|
||||
'url': 'https://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
|
||||
'md5': '8011ae8e88ff4150107ab9c5a8f5b659',
|
||||
'info_dict': {
|
||||
'id': '40049',
|
||||
'ext': 'mp4',
|
||||
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'upload_date': '20150212',
|
||||
'duration': 2694,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
|
||||
# multiple formats; has item.id
|
||||
'url': 'https://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
|
||||
'info_dict': {
|
||||
'id': '364746',
|
||||
'ext': 'mp4',
|
||||
'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'upload_date': '20160407',
|
||||
'duration': 179,
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00',
|
||||
'url': 'https://www.1tv.ru/news/issue/2016-12-01/14:00',
|
||||
'info_dict': {
|
||||
'id': '14:00',
|
||||
'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал',
|
||||
'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd',
|
||||
'title': 'Выпуск программы «Время» в 20:00 1 декабря 2016 года. Новости. Первый канал',
|
||||
'thumbnail': 'https://static.1tv.ru/uploads/photo/image/8/big/338448_big_8fc7eb236f.jpg',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# has timestamp; has item.uid but not item.id
|
||||
'url': 'https://www.1tv.ru/shows/segodnya-vecherom/vypuski/avtory-odnogo-hita-segodnya-vecherom-vypusk-ot-03-05-2025',
|
||||
'info_dict': {
|
||||
'id': '270411',
|
||||
'ext': 'mp4',
|
||||
'title': 'Авторы одного хита. Сегодня вечером. Выпуск от 03.05.2025',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1746286020,
|
||||
'upload_date': '20250503',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
|
||||
'only_matching': True,
|
||||
@@ -57,96 +72,60 @@ class FirstTVIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, items):
|
||||
for item in items:
|
||||
video_id = str(item.get('id') or item['uid'])
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for f in traverse_obj(item, ('sources', lambda _, v: url_or_none(v['src']))):
|
||||
src = f['src']
|
||||
ext = mimetype2ext(f.get('type'), default=determine_ext(src))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
tbr = self._search_regex(fr'_(\d{{3,}})\.{ext}', src, 'tbr', default=None)
|
||||
formats.append({
|
||||
'url': src,
|
||||
'ext': ext,
|
||||
'format_id': join_nonempty('http', ext, tbr),
|
||||
'tbr': int_or_none(tbr),
|
||||
# quality metadata of http formats may be incorrect
|
||||
'quality': -10,
|
||||
})
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
yield {
|
||||
**traverse_obj(item, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('poster', {url_or_none}),
|
||||
'timestamp': ('dvr_begin_at', {int_or_none}),
|
||||
'upload_date': ('date_air', {unified_strdate}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
playlist_url = urllib.parse.urljoin(url, self._search_regex(
|
||||
playlist_url = urllib.parse.urljoin(url, self._html_search_regex(
|
||||
r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'playlist url', group='url'))
|
||||
|
||||
parsed_url = urllib.parse.urlparse(playlist_url)
|
||||
qs = urllib.parse.parse_qs(parsed_url.query)
|
||||
item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]')
|
||||
item_ids = traverse_obj(parse_qs(playlist_url), 'video_id', 'videos_ids[]', 'news_ids[]')
|
||||
items = traverse_obj(
|
||||
self._download_json(playlist_url, display_id),
|
||||
lambda _, v: v['uid'] and (str(v['uid']) in item_ids if item_ids else True))
|
||||
|
||||
items = self._download_json(playlist_url, display_id)
|
||||
|
||||
if item_ids:
|
||||
items = [
|
||||
item for item in items
|
||||
if item.get('uid') and str(item['uid']) in item_ids]
|
||||
else:
|
||||
items = [items[0]]
|
||||
|
||||
entries = []
|
||||
QUALITIES = ('ld', 'sd', 'hd')
|
||||
|
||||
for item in items:
|
||||
title = item['title']
|
||||
quality = qualities(QUALITIES)
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = url_or_none(f.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
if not path:
|
||||
path = self._search_regex(
|
||||
r'//[^/]+/(.+?)_\d+\.mp4', src,
|
||||
'm3u8 path', default=None)
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': f.get('name'),
|
||||
'tbr': tbr,
|
||||
'source_preference': quality(f.get('name')),
|
||||
# quality metadata of http formats may be incorrect
|
||||
'preference': -10,
|
||||
})
|
||||
# m3u8 URL format is reverse engineered from [1] (search for
|
||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||
# is taken from [2].
|
||||
# 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
|
||||
# 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
|
||||
if not path and len(formats) == 1:
|
||||
path = self._search_regex(
|
||||
r'//[^/]+/(.+?$)', formats[0]['url'],
|
||||
'm3u8 path', default=None)
|
||||
if path:
|
||||
if len(formats) == 1:
|
||||
m3u8_path = ','
|
||||
else:
|
||||
tbrs = [str(t) for t in sorted(f['tbr'] for f in formats)]
|
||||
m3u8_path = '_,{},{}'.format(','.join(tbrs), '.mp4')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f'http://balancer-vod.1tv.ru/{path}{m3u8_path}.urlset/master.m3u8',
|
||||
display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(item.get('duration') or self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'ya:ovs:upload_date', webpage, 'upload date', default=None))
|
||||
|
||||
entries.append({
|
||||
'id': str(item.get('id') or item['uid']),
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'duration': int_or_none(duration),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"),
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None)
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description', default=None)
|
||||
|
||||
return self.playlist_result(entries, display_id, title, description)
|
||||
return self.playlist_result(
|
||||
self._entries(items), display_id, self._og_search_title(webpage, default=None),
|
||||
thumbnail=self._og_search_thumbnail(webpage, default=None))
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import urllib.parse
|
||||
|
||||
from .once import OnceIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class GameSpotIE(OnceIE):
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
|
||||
@@ -16,7 +16,6 @@ from ..utils import (
|
||||
MEDIA_EXTENSIONS,
|
||||
ExtractorError,
|
||||
UnsupportedError,
|
||||
base_url,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
@@ -38,6 +37,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
@@ -2538,12 +2538,13 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=full_response.url),
|
||||
xspf_base_url=new_url),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
|
||||
doc,
|
||||
mpd_base_url=base_url(full_response.url),
|
||||
# Do not use yt_dlp.utils.base_url here since it will raise on file:// URLs
|
||||
mpd_base_url=update_url(new_url, query=None, fragment=None).rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GetCourseRuPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_VALID_URL = r'https?://(?:player02\.getcourse\.ru|cf-api-2\.vhcdn\.com)/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
|
||||
_TESTS = [{
|
||||
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
|
||||
@@ -20,6 +20,16 @@ class GetCourseRuPlayerIE(InfoExtractor):
|
||||
'duration': 1693,
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}, {
|
||||
'url': 'https://cf-api-2.vhcdn.com/sign-player/?json=example',
|
||||
'info_dict': {
|
||||
'id': '435735291',
|
||||
'title': '8afd7c489952108e00f019590f3711f3',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/8afd7c489952108e00f019590f3711f3/preview.jpg?version=1682170973&host=vh-72',
|
||||
'duration': 777,
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -168,7 +178,7 @@ class GetCourseRuIE(InfoExtractor):
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
|
||||
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
@@ -6,9 +7,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,7 +15,6 @@ class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_GEO_BYPASS = False
|
||||
_NETRC_MACHINE = 'iprima'
|
||||
_AUTH_ROOT = 'https://auth.iprima.cz'
|
||||
access_token = None
|
||||
|
||||
_TESTS = [{
|
||||
@@ -86,48 +84,18 @@ class IPrimaIE(InfoExtractor):
|
||||
if self.access_token:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
f'{self._AUTH_ROOT}/oauth2/login', None, note='Downloading login page',
|
||||
errnote='Downloading login page failed')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'_email': username,
|
||||
'_password': password})
|
||||
|
||||
profile_select_html, login_handle = self._download_webpage_handle(
|
||||
f'{self._AUTH_ROOT}/oauth2/login', None, data=urlencode_postdata(login_form),
|
||||
note='Logging in')
|
||||
|
||||
# a profile may need to be selected first, even when there is only a single one
|
||||
if '/profile-select' in login_handle.url:
|
||||
profile_id = self._search_regex(
|
||||
r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
|
||||
|
||||
login_handle = self._request_webpage(
|
||||
f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
|
||||
query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
|
||||
|
||||
code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
|
||||
if not code:
|
||||
raise ExtractorError('Login failed', expected=True)
|
||||
|
||||
token_request_data = {
|
||||
'scope': 'openid+email+profile+phone+address+offline_access',
|
||||
'client_id': 'prima_sso',
|
||||
'grant_type': 'authorization_code',
|
||||
'code': code,
|
||||
'redirect_uri': f'{self._AUTH_ROOT}/sso/auth-check'}
|
||||
|
||||
token_data = self._download_json(
|
||||
f'{self._AUTH_ROOT}/oauth2/token', None,
|
||||
note='Downloading token', errnote='Downloading token failed',
|
||||
data=urlencode_postdata(token_request_data))
|
||||
'https://ucet.iprima.cz/api/session/create', None,
|
||||
note='Logging in', errnote='Failed to log in',
|
||||
data=json.dumps({
|
||||
'email': username,
|
||||
'password': password,
|
||||
'deviceName': 'Windows Chrome',
|
||||
}).encode(), headers={'content-type': 'application/json'})
|
||||
|
||||
self.access_token = token_data.get('access_token')
|
||||
if self.access_token is None:
|
||||
raise ExtractorError('Getting token failed', expected=True)
|
||||
self.access_token = token_data['accessToken']['value']
|
||||
if not self.access_token:
|
||||
raise ExtractorError('Failed to fetch access token')
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self.access_token:
|
||||
|
||||
@@ -1,23 +1,33 @@
|
||||
import functools
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
make_archive_id,
|
||||
orderedSet,
|
||||
smuggle_url,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JioSaavnBaseIE(InfoExtractor):
|
||||
_URL_BASE_RE = r'https?://(?:www\.)?(?:jio)?saavn\.com'
|
||||
_API_URL = 'https://www.jiosaavn.com/api.php'
|
||||
_VALID_BITRATES = {'16', '32', '64', '128', '320'}
|
||||
|
||||
@@ -30,16 +40,20 @@ class JioSaavnBaseIE(InfoExtractor):
|
||||
f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
|
||||
return requested_bitrates
|
||||
|
||||
def _extract_formats(self, song_data):
|
||||
def _extract_formats(self, item_data):
|
||||
# Show/episode JSON data has a slightly different structure than song JSON data
|
||||
if media_url := traverse_obj(item_data, ('more_info', 'encrypted_media_url', {str})):
|
||||
item_data.setdefault('encrypted_media_url', media_url)
|
||||
|
||||
for bitrate in self.requested_bitrates:
|
||||
media_data = self._download_json(
|
||||
self._API_URL, song_data['id'],
|
||||
self._API_URL, item_data['id'],
|
||||
f'Downloading format info for {bitrate}',
|
||||
fatal=False, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': bitrate,
|
||||
'url': song_data['encrypted_media_url'],
|
||||
'url': item_data['encrypted_media_url'],
|
||||
}))
|
||||
if not traverse_obj(media_data, ('auth_url', {url_or_none})):
|
||||
self.report_warning(f'Unable to extract format info for {bitrate}')
|
||||
@@ -53,24 +67,6 @@ class JioSaavnBaseIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
def _extract_song(self, song_data, url=None):
|
||||
info = traverse_obj(song_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('song', {clean_html}),
|
||||
'album': ('album', {clean_html}),
|
||||
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
|
||||
'webpage_url': ('perma_url', {url_or_none}),
|
||||
})
|
||||
if webpage_url := info.get('webpage_url') or url:
|
||||
info['display_id'] = url_basename(webpage_url)
|
||||
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, type_, token, note='API', params={}):
|
||||
return self._download_json(
|
||||
self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
|
||||
@@ -84,19 +80,89 @@ class JioSaavnBaseIE(InfoExtractor):
|
||||
**params,
|
||||
})
|
||||
|
||||
def _yield_songs(self, playlist_data):
|
||||
for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
|
||||
song_info = self._extract_song(song_data)
|
||||
url = smuggle_url(song_info['webpage_url'], {
|
||||
'id': song_data['id'],
|
||||
'encrypted_media_url': song_data['encrypted_media_url'],
|
||||
})
|
||||
yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
|
||||
@staticmethod
|
||||
def _extract_song(song_data, url=None):
|
||||
info = traverse_obj(song_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': (('song', 'title'), {clean_html}, any),
|
||||
'album': ((None, 'more_info'), 'album', {clean_html}, any),
|
||||
'duration': ((None, 'more_info'), 'duration', {int_or_none}, any),
|
||||
'channel': ((None, 'more_info'), 'label', {str}, any),
|
||||
'channel_id': ((None, 'more_info'), 'label_id', {str}, any),
|
||||
'channel_url': ((None, 'more_info'), 'label_url', {urljoin('https://www.jiosaavn.com/')}, any),
|
||||
'release_date': ((None, 'more_info'), 'release_date', {unified_strdate}, any),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'language': ('language', {lambda x: ISO639Utils.short2long(x.casefold()) or 'und'}),
|
||||
'webpage_url': ('perma_url', {url_or_none}),
|
||||
'artists': ('more_info', 'artistMap', 'primary_artists', ..., 'name', {str}, filter, all),
|
||||
})
|
||||
if webpage_url := info.get('webpage_url') or url:
|
||||
info['display_id'] = url_basename(webpage_url)
|
||||
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
|
||||
|
||||
if primary_artists := traverse_obj(song_data, ('primary_artists', {lambda x: x.split(', ') if x else None})):
|
||||
info['artists'].extend(primary_artists)
|
||||
if featured_artists := traverse_obj(song_data, ('featured_artists', {str}, filter)):
|
||||
info['artists'].extend(featured_artists.split(', '))
|
||||
info['artists'] = orderedSet(info['artists']) or None
|
||||
|
||||
return info
|
||||
|
||||
@staticmethod
|
||||
def _extract_episode(episode_data, url=None):
|
||||
info = JioSaavnBaseIE._extract_song(episode_data, url)
|
||||
info.pop('_old_archive_ids', None)
|
||||
info.update(traverse_obj(episode_data, {
|
||||
'description': ('more_info', 'description', {str}),
|
||||
'timestamp': ('more_info', 'release_time', {unified_timestamp}),
|
||||
'series': ('more_info', 'show_title', {str}),
|
||||
'series_id': ('more_info', 'show_id', {str}),
|
||||
'season': ('more_info', 'season_title', {str}),
|
||||
'season_number': ('more_info', 'season_no', {int_or_none}),
|
||||
'season_id': ('more_info', 'season_id', {str}),
|
||||
'episode_number': ('more_info', 'episode_number', {int_or_none}),
|
||||
'cast': ('starring', {lambda x: x.split(', ') if x else None}),
|
||||
}))
|
||||
return info
|
||||
|
||||
def _extract_jiosaavn_result(self, url, endpoint, response_key, parse_func):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
data = traverse_obj(smuggled_data, ({
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ('encrypted_media_url', {str}),
|
||||
}))
|
||||
|
||||
if 'id' in data and 'encrypted_media_url' in data:
|
||||
result = {'id': data['id']}
|
||||
else:
|
||||
# only extract metadata if this is not a url_transparent result
|
||||
data = self._call_api(endpoint, self._match_id(url))[response_key][0]
|
||||
result = parse_func(data, url)
|
||||
|
||||
result['formats'] = list(self._extract_formats(data))
|
||||
return result
|
||||
|
||||
def _yield_items(self, playlist_data, keys=None, parse_func=None):
|
||||
"""Subclasses using this method must set _ENTRY_IE"""
|
||||
if parse_func is None:
|
||||
parse_func = self._extract_song
|
||||
|
||||
for item_data in traverse_obj(playlist_data, (
|
||||
*variadic(keys, (str, bytes, dict, set)), lambda _, v: v['id'] and v['perma_url'],
|
||||
)):
|
||||
info = parse_func(item_data)
|
||||
url = smuggle_url(info['webpage_url'], traverse_obj(item_data, {
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ((None, 'more_info'), 'encrypted_media_url', {str}, any),
|
||||
}))
|
||||
yield self.url_result(url, self._ENTRY_IE, url_transparent=True, **info)
|
||||
|
||||
|
||||
class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:song'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'(?:/song/[^/?#]+/|/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
||||
'md5': '3b84396d15ed9e083c3106f1fa589c04',
|
||||
@@ -106,12 +172,38 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
'ext': 'm4a',
|
||||
'title': 'Leja Re',
|
||||
'album': 'Leja Re',
|
||||
'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 205,
|
||||
'view_count': int,
|
||||
'release_year': 2018,
|
||||
'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
|
||||
'_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
|
||||
'channel': 'T-Series',
|
||||
'language': 'hin',
|
||||
'channel_id': '34297',
|
||||
'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
|
||||
'release_date': '20181124',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/song/chuttamalle/P1FfWjZkQ0Q',
|
||||
'md5': '96296c58d6ce488a417ef0728fd2d680',
|
||||
'info_dict': {
|
||||
'id': 'O94kBTtw',
|
||||
'display_id': 'P1FfWjZkQ0Q',
|
||||
'ext': 'm4a',
|
||||
'title': 'Chuttamalle',
|
||||
'album': 'Devara Part 1 - Telugu',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 222,
|
||||
'view_count': int,
|
||||
'release_year': 2024,
|
||||
'artists': 'count:3',
|
||||
'_old_archive_ids': ['jiosaavnsong P1FfWjZkQ0Q'],
|
||||
'channel': 'T-Series',
|
||||
'language': 'tel',
|
||||
'channel_id': '34297',
|
||||
'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
|
||||
'release_date': '20240926',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
||||
@@ -119,26 +211,51 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
song_data = traverse_obj(smuggled_data, ({
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ('encrypted_media_url', {str}),
|
||||
}))
|
||||
return self._extract_jiosaavn_result(url, 'song', 'songs', self._extract_song)
|
||||
|
||||
if 'id' in song_data and 'encrypted_media_url' in song_data:
|
||||
result = {'id': song_data['id']}
|
||||
else:
|
||||
# only extract metadata if this is not a url_transparent result
|
||||
song_data = self._call_api('song', self._match_id(url))['songs'][0]
|
||||
result = self._extract_song(song_data, url)
|
||||
|
||||
result['formats'] = list(self._extract_formats(song_data))
|
||||
return result
|
||||
class JioSaavnShowIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:show'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/[^/?#]+/(?P<id>[^/?#]{11,})/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/shows/non-food-ways-to-boost-your-energy/XFMcKICOCgc_',
|
||||
'md5': '0733cd254cfe74ef88bea1eaedcf1f4f',
|
||||
'info_dict': {
|
||||
'id': 'qqzh3RKZ',
|
||||
'display_id': 'XFMcKICOCgc_',
|
||||
'ext': 'mp3',
|
||||
'title': 'Non-Food Ways To Boost Your Energy',
|
||||
'description': 'md5:26e7129644b5c6aada32b8851c3997c8',
|
||||
'episode': 'Episode 1',
|
||||
'timestamp': 1640563200,
|
||||
'series': 'Holistic Lifestyle With Neha Ranglani',
|
||||
'series_id': '52397',
|
||||
'season': 'Holistic Lifestyle With Neha Ranglani',
|
||||
'season_number': 1,
|
||||
'season_id': '61273',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 311,
|
||||
'view_count': int,
|
||||
'release_year': 2021,
|
||||
'language': 'eng',
|
||||
'channel': 'Saavn OG',
|
||||
'channel_id': '1953876',
|
||||
'episode_number': 1,
|
||||
'upload_date': '20211227',
|
||||
'release_date': '20211227',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/shows/himesh-reshammiya/Kr8fmfSN4vo_',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_jiosaavn_result(url, 'episode', 'episodes', self._extract_episode)
|
||||
|
||||
|
||||
class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:album'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/album/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
|
||||
'info_dict': {
|
||||
@@ -147,18 +264,19 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
album_data = self._call_api('album', display_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
|
||||
self._yield_items(album_data, 'songs'), display_id, traverse_obj(album_data, ('title', {str})))
|
||||
|
||||
|
||||
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||
'info_dict': {
|
||||
@@ -172,15 +290,16 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
'id': 'DVR,pFUOwyXqIp77B1JF,A__',
|
||||
'title': 'Mood Hindi',
|
||||
},
|
||||
'playlist_mincount': 801,
|
||||
'playlist_mincount': 750,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
|
||||
'info_dict': {
|
||||
'id': 'Me5RridRfDk_',
|
||||
'title': 'Taaza Tunes',
|
||||
},
|
||||
'playlist_mincount': 301,
|
||||
'playlist_mincount': 50,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, token, page):
|
||||
@@ -189,7 +308,7 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
|
||||
def _entries(self, token, first_page_data, page):
|
||||
page_data = first_page_data if not page else self._fetch_page(token, page + 1)
|
||||
yield from self._yield_songs(page_data)
|
||||
yield from self._yield_items(page_data, 'songs')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -199,3 +318,95 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
return self.playlist_result(InAdvancePagedList(
|
||||
functools.partial(self._entries, display_id, playlist_data),
|
||||
total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
|
||||
|
||||
|
||||
class JioSaavnShowPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:show:playlist'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/(?P<show>[^#/?]+)/(?P<season>\d+)/[^/?#]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/shows/talking-music/1/PjReFP-Sguk_',
|
||||
'info_dict': {
|
||||
'id': 'talking-music-1',
|
||||
'title': 'Talking Music',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnShowIE
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _fetch_page(self, show_id, season_id, page):
|
||||
return self._call_api('show', show_id, f'show page {page}', {
|
||||
'p': page,
|
||||
'__call': 'show.getAllEpisodes',
|
||||
'show_id': show_id,
|
||||
'season_number': season_id,
|
||||
'api_version': '4',
|
||||
'sort_order': 'desc',
|
||||
})
|
||||
|
||||
def _entries(self, show_id, season_id, page):
|
||||
page_data = self._fetch_page(show_id, season_id, page + 1)
|
||||
yield from self._yield_items(page_data, keys=None, parse_func=self._extract_episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_slug, season_id = self._match_valid_url(url).group('show', 'season')
|
||||
playlist_id = f'{show_slug}-{season_id}'
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
show_info = self._search_json(
|
||||
r'window\.__INITIAL_DATA__\s*=', webpage, 'initial data',
|
||||
playlist_id, transform_source=js_to_json)['showView']
|
||||
show_id = show_info['current_id']
|
||||
|
||||
entries = OnDemandPagedList(functools.partial(self._entries, show_id, season_id), self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, traverse_obj(show_info, ('show', 'title', 'text', {str})))
|
||||
|
||||
|
||||
class JioSaavnArtistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:artist'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/artist/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/artist/krsna-songs/rYLBEve2z3U_',
|
||||
'info_dict': {
|
||||
'id': 'rYLBEve2z3U_',
|
||||
'title': 'KR$NA',
|
||||
},
|
||||
'playlist_mincount': 38,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/artist/sanam-puri-songs/SkNEv3qRhDE_',
|
||||
'info_dict': {
|
||||
'id': 'SkNEv3qRhDE_',
|
||||
'title': 'Sanam Puri',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, artist_id, page):
|
||||
return self._call_api('artist', artist_id, f'artist page {page + 1}', {
|
||||
'p': page,
|
||||
'n_song': self._PAGE_SIZE,
|
||||
'n_album': self._PAGE_SIZE,
|
||||
'sub_type': '',
|
||||
'includeMetaTags': '',
|
||||
'api_version': '4',
|
||||
'category': 'alphabetical',
|
||||
'sort_order': 'asc',
|
||||
})
|
||||
|
||||
def _entries(self, artist_id, first_page):
|
||||
for page in itertools.count():
|
||||
playlist_data = first_page if not page else self._fetch_page(artist_id, page)
|
||||
if not traverse_obj(playlist_data, ('topSongs', ..., {dict})):
|
||||
break
|
||||
yield from self._yield_items(playlist_data, 'topSongs')
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_id = self._match_id(url)
|
||||
first_page = self._fetch_page(artist_id, 0)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(artist_id, first_page), artist_id,
|
||||
traverse_obj(first_page, ('name', {str})))
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,12 +10,12 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
srt_subtitles_timecode,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_elements, require, traverse_obj
|
||||
|
||||
|
||||
class LinkedInBaseIE(InfoExtractor):
|
||||
@@ -82,7 +83,10 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
|
||||
|
||||
|
||||
class LinkedInIE(LinkedInBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)',
|
||||
r'https?://(?:www\.)?linkedin\.com/feed/update/urn:li:activity:(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
|
||||
'info_dict': {
|
||||
@@ -106,6 +110,9 @@ class LinkedInIE(LinkedInBaseIE):
|
||||
'like_count': int,
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/feed/update/urn:li:activity:7016901149999955968/?utm_source=share&utm_medium=member_desktop',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -271,3 +278,110 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
||||
entries, course_slug,
|
||||
course_data.get('title'),
|
||||
course_data.get('description'))
|
||||
|
||||
|
||||
class LinkedInEventsIE(LinkedInBaseIE):
|
||||
IE_NAME = 'linkedin:events'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/events/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/events/7084656651378536448/comments/',
|
||||
'info_dict': {
|
||||
'id': '7084656651378536448',
|
||||
'ext': 'mp4',
|
||||
'title': '#37 Aprende a hacer una entrevista en inglés para tu próximo trabajo remoto',
|
||||
'description': '¡Agarra para anotar que se viene tremendo evento!',
|
||||
'duration': 1765,
|
||||
'timestamp': 1689113772,
|
||||
'upload_date': '20230711',
|
||||
'release_timestamp': 1689174012,
|
||||
'release_date': '20230712',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/events/27-02energyfreedombyenergyclub7295762520814874625/comments/',
|
||||
'info_dict': {
|
||||
'id': '27-02energyfreedombyenergyclub7295762520814874625',
|
||||
'ext': 'mp4',
|
||||
'title': '27.02 Energy Freedom by Energy Club',
|
||||
'description': 'md5:1292e6f31df998914c293787a02c3b91',
|
||||
'duration': 6420,
|
||||
'timestamp': 1739445333,
|
||||
'upload_date': '20250213',
|
||||
'release_timestamp': 1740657620,
|
||||
'release_date': '20250227',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://www.linkedin.com/').get('li_at'):
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
event_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, event_id)
|
||||
|
||||
base_data = traverse_obj(webpage, (
|
||||
{find_elements(tag='code', attr='style', value='display: none')}, ..., {json.loads}, 'included', ...))
|
||||
meta_data = traverse_obj(base_data, (
|
||||
lambda _, v: v['$type'] == 'com.linkedin.voyager.dash.events.ProfessionalEvent', any)) or {}
|
||||
|
||||
live_status = {
|
||||
'PAST': 'was_live',
|
||||
'ONGOING': 'is_live',
|
||||
'FUTURE': 'is_upcoming',
|
||||
}.get(meta_data.get('lifecycleState'))
|
||||
|
||||
if live_status == 'is_upcoming':
|
||||
player_data = {}
|
||||
if event_time := traverse_obj(meta_data, ('displayEventTime', {str})):
|
||||
message = f'This live event is scheduled for {event_time}'
|
||||
else:
|
||||
message = 'This live event has not yet started'
|
||||
self.raise_no_formats(message, expected=True, video_id=event_id)
|
||||
else:
|
||||
# TODO: Add support for audio-only live events
|
||||
player_data = traverse_obj(base_data, (
|
||||
lambda _, v: v['$type'] == 'com.linkedin.videocontent.VideoPlayMetadata',
|
||||
any, {require('video player data')}))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for prog_fmts in traverse_obj(player_data, ('progressiveStreams', ..., {dict})):
|
||||
for fmt_url in traverse_obj(prog_fmts, ('streamingLocations', ..., 'url', {url_or_none})):
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
**traverse_obj(prog_fmts, {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'tbr': ('bitRate', {int_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'ext': ('mediaType', {mimetype2ext}),
|
||||
}),
|
||||
})
|
||||
|
||||
for m3u8_url in traverse_obj(player_data, (
|
||||
'adaptiveStreams', lambda _, v: v['protocol'] == 'HLS', 'masterPlaylists', ..., 'url', {url_or_none},
|
||||
)):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, event_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': event_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(meta_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', 'text', {str}),
|
||||
'timestamp': ('createdAt', {int_or_none(scale=1000)}),
|
||||
# timeRange.start is available when the stream is_upcoming
|
||||
'release_timestamp': ('timeRange', 'start', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
**traverse_obj(player_data, {
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
# liveStreamCreatedAt is only available when the stream is_live or was_live
|
||||
'release_timestamp': ('liveStreamCreatedAt', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, url_or_none
|
||||
from ..utils import int_or_none, jwt_decode_hs256, try_call, url_or_none
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
@@ -55,13 +59,81 @@ class LocoIE(InfoExtractor):
|
||||
'upload_date': '20250226',
|
||||
'modified_date': '20250226',
|
||||
},
|
||||
}, {
|
||||
# Requires video authorization
|
||||
'url': 'https://loco.com/stream/ac854641-ae0f-497c-a8ea-4195f6d8cc53',
|
||||
'md5': '0513edf85c1e65c9521f555f665387d5',
|
||||
'info_dict': {
|
||||
'id': 'ac854641-ae0f-497c-a8ea-4195f6d8cc53',
|
||||
'ext': 'mp4',
|
||||
'title': 'DUAS CONTAS DESAFIANTE, RUSH TOP 1 NO BRASIL!',
|
||||
'description': 'md5:aa77818edd6fe00dd4b6be75cba5f826',
|
||||
'uploader_id': '7Y9JNAZC3Q',
|
||||
'channel': 'ayellol',
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 1229,
|
||||
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/f5aa678b-6d04-45d9-a89a-859af0a8028f.jpg',
|
||||
'tags': ['Gameplay', 'Carry'],
|
||||
'series': 'League of Legends',
|
||||
'timestamp': 1741182253,
|
||||
'upload_date': '20250305',
|
||||
'modified_timestamp': 1741182419,
|
||||
'modified_date': '20250305',
|
||||
},
|
||||
}]
|
||||
|
||||
# From _app.js
|
||||
_CLIENT_ID = 'TlwKp1zmF6eKFpcisn3FyR18WkhcPkZtzwPVEEC3'
|
||||
_CLIENT_SECRET = 'Kp7tYlUN7LXvtcSpwYvIitgYcLparbtsQSe5AdyyCdiEJBP53Vt9J8eB4AsLdChIpcO2BM19RA3HsGtqDJFjWmwoonvMSG3ZQmnS8x1YIM8yl82xMXZGbE3NKiqmgBVU'
|
||||
|
||||
def _is_jwt_expired(self, token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 300
|
||||
|
||||
def _get_access_token(self, video_id):
|
||||
access_token = try_call(lambda: self._get_cookies('https://loco.com')['access_token'].value)
|
||||
if access_token and not self._is_jwt_expired(access_token):
|
||||
return access_token
|
||||
access_token = traverse_obj(self._download_json(
|
||||
'https://api.getloconow.com/v3/user/device_profile/', video_id,
|
||||
'Downloading access token', fatal=False, data=json.dumps({
|
||||
'platform': 7,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
'model': 'Mozilla',
|
||||
'os_name': 'Win32',
|
||||
'os_ver': '5.0 (Windows)',
|
||||
'app_ver': '5.0 (Windows)',
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'DEVICE-ID': ''.join(random.choices('0123456789abcdef', k=32)) + 'live',
|
||||
'X-APP-LANG': 'en',
|
||||
'X-APP-LOCALE': 'en-US',
|
||||
'X-CLIENT-ID': self._CLIENT_ID,
|
||||
'X-CLIENT-SECRET': self._CLIENT_SECRET,
|
||||
'X-PLATFORM': '7',
|
||||
}), 'access_token')
|
||||
if access_token and not self._is_jwt_expired(access_token):
|
||||
self._set_cookie('.loco.com', 'access_token', access_token)
|
||||
return access_token
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_type, video_id = self._match_valid_url(url).group('type', 'id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')}))
|
||||
'props', 'pageProps', ('liveStreamData', 'stream', 'liveStream'), {dict}, any, {require('stream info')}))
|
||||
|
||||
if access_token := self._get_access_token(video_id):
|
||||
self._request_webpage(
|
||||
'https://drm.loco.com/v1/streams/playback/', video_id,
|
||||
'Downloading video authorization', fatal=False, headers={
|
||||
'authorization': access_token,
|
||||
}, query={
|
||||
'stream_uid': stream['uid'],
|
||||
})
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),
|
||||
|
||||
@@ -2,7 +2,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
@@ -138,13 +137,15 @@ class LRTRadioIE(LRTBaseIE):
|
||||
'https://www.lrt.lt/radioteka/api/media', video_id,
|
||||
query={'url': f'/mediateka/irasas/{video_id}/{path}'})
|
||||
|
||||
return traverse_obj(media, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'categories': ('playlist_item', 'category', {str}, filter, all, filter),
|
||||
'description': ('content', {clean_html}, {str}),
|
||||
'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
|
||||
'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
|
||||
'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}),
|
||||
})
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(media['playlist_item']['file'], video_id),
|
||||
**traverse_obj(media, {
|
||||
'title': ('title', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'categories': ('playlist_item', 'category', {str}, filter, all, filter),
|
||||
'description': ('content', {clean_html}, {str}),
|
||||
'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
|
||||
'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,31 +1,38 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
join_nonempty,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ManyVidsIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# preview video
|
||||
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||
'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks',
|
||||
'md5': '738dc723f7735ee9602f7ea352a6d058',
|
||||
'info_dict': {
|
||||
'id': '133957',
|
||||
'id': '530341-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'everthing about me (Preview)',
|
||||
'uploader': 'ellyxxix',
|
||||
'title': 'MV Tips & Tricks (Preview)',
|
||||
'description': r're:I will take you on a tour around .{1313}$',
|
||||
'thumbnail': r're:https://cdn5\.manyvids\.com/php_uploads/video_images/DestinyDiaz/.+\.jpg',
|
||||
'uploader': 'DestinyDiaz',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'release_timestamp': 1508419904,
|
||||
'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'],
|
||||
'release_date': '20171019',
|
||||
'duration': 3167.0,
|
||||
},
|
||||
'expected_warnings': ['Only extracting preview'],
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
@@ -34,129 +41,68 @@ class ManyVidsIE(InfoExtractor):
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'description': 'md5:ec5901d41808b3746fed90face161612',
|
||||
'description': r're:Today is the day!! I am finally taking off my mask .{445}$',
|
||||
'thumbnail': r're:https://ods\.manyvids\.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/.+\.jpg',
|
||||
'uploader': 'Sarah Calanthe',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'release_date': '20181110',
|
||||
'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'],
|
||||
'release_timestamp': 1541851200,
|
||||
'duration': 224.0,
|
||||
},
|
||||
}]
|
||||
_API_BASE = 'https://www.manyvids.com/bff/store/video'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(f'{self._API_BASE}/{video_id}/private', video_id)['data']
|
||||
formats, preview_only = [], True
|
||||
|
||||
real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js'
|
||||
try:
|
||||
webpage = self._download_webpage(real_url, video_id)
|
||||
except Exception:
|
||||
# probably useless fallback
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
|
||||
webpage, 'meta details', default='')
|
||||
info = extract_attributes(info)
|
||||
|
||||
player = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
|
||||
webpage, 'player details', default='')
|
||||
player = extract_attributes(player)
|
||||
|
||||
video_urls_and_ids = (
|
||||
(info.get('data-meta-video'), 'video'),
|
||||
(player.get('data-video-transcoded'), 'transcoded'),
|
||||
(player.get('data-video-filepath'), 'filepath'),
|
||||
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
|
||||
)
|
||||
|
||||
def txt_or_none(s, default=None):
|
||||
return (s.strip() or default) if isinstance(s, str) else default
|
||||
|
||||
uploader = txt_or_none(info.get('data-meta-author'))
|
||||
|
||||
def mung_title(s):
|
||||
if uploader:
|
||||
s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s)
|
||||
return txt_or_none(s)
|
||||
|
||||
title = (
|
||||
mung_title(info.get('data-meta-title'))
|
||||
or self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None)
|
||||
or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True))
|
||||
|
||||
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
|
||||
mv_token = self._search_regex(
|
||||
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'mv token', default=None, group='value')
|
||||
|
||||
if mv_token:
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, note='Setting format cookies', fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
|
||||
formats = []
|
||||
for v_url, fmt in video_urls_and_ids:
|
||||
v_url = url_or_none(v_url)
|
||||
if not v_url:
|
||||
for format_id, path in [
|
||||
('preview', ['teaser', 'filepath']),
|
||||
('transcoded', ['transcodedFilepath']),
|
||||
('filepath', ['filepath']),
|
||||
]:
|
||||
format_url = traverse_obj(video_data, (*path, {url_or_none}))
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(v_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls'))
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': fmt,
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'preference': -10 if format_id == 'preview' else None,
|
||||
'quality': 10 if format_id == 'filepath' else None,
|
||||
'height': int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', format_url, 'height', default=None)),
|
||||
})
|
||||
if format_id != 'preview':
|
||||
preview_only = False
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
metadata = traverse_obj(
|
||||
self._download_json(f'{self._API_BASE}/{video_id}', video_id, fatal=False), 'data')
|
||||
title = traverse_obj(metadata, ('title', {clean_html}))
|
||||
|
||||
for f in formats:
|
||||
if f.get('height') is None:
|
||||
f['height'] = int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
|
||||
if '/preview/' in f['url']:
|
||||
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
|
||||
f['preference'] = -10
|
||||
if 'transcoded' in f['format_id']:
|
||||
f['preference'] = f.get('preference', -1) - 1
|
||||
|
||||
def get_likes():
|
||||
likes = self._search_regex(
|
||||
rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''',
|
||||
webpage, 'likes', default='')
|
||||
likes = extract_attributes(likes)
|
||||
return int_or_none(likes.get('data-likes'))
|
||||
|
||||
def get_views():
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
|
||||
webpage, 'view count', default=None))
|
||||
if preview_only:
|
||||
title = join_nonempty(title, '(Preview)', delim=' ')
|
||||
video_id += '-preview'
|
||||
self.report_warning(
|
||||
f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': txt_or_none(info.get('data-meta-description')),
|
||||
'uploader': txt_or_none(info.get('data-meta-author')),
|
||||
'thumbnail': (
|
||||
url_or_none(info.get('data-meta-image'))
|
||||
or url_or_none(player.get('data-video-screenshot'))),
|
||||
'view_count': get_views(),
|
||||
'like_count': get_likes(),
|
||||
**traverse_obj(metadata, {
|
||||
'description': ('description', {clean_html}),
|
||||
'uploader': ('model', 'displayName', {clean_html}),
|
||||
'thumbnail': (('screenshot', 'thumbnail'), {url_or_none}, any),
|
||||
'view_count': ('views', {parse_count}),
|
||||
'like_count': ('likes', {parse_count}),
|
||||
'release_timestamp': ('launchDate', {parse_iso8601}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'tags': ('tagList', ..., 'label', {str}, filter, all, filter),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -365,13 +365,15 @@ mutation initPlaybackSession(
|
||||
'All videos are only available to registered users', method='password')
|
||||
|
||||
def _set_device_id(self, username):
|
||||
if not self._device_id:
|
||||
self._device_id = self.cache.load(
|
||||
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
||||
if self._device_id:
|
||||
return
|
||||
device_id_cache = self.cache.load(self._NETRC_MACHINE, 'device_ids', default={})
|
||||
self._device_id = device_id_cache.get(username)
|
||||
if self._device_id:
|
||||
return
|
||||
self._device_id = str(uuid.uuid4())
|
||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
||||
device_id_cache[username] = self._device_id
|
||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', device_id_cache)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
|
||||
@@ -3,6 +3,7 @@ import json
|
||||
|
||||
from .art19 import Art19IE
|
||||
from .common import InfoExtractor
|
||||
from ..networking import PATCHRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -74,7 +75,7 @@ class NebulaBaseIE(InfoExtractor):
|
||||
'app_version': '23.10.0',
|
||||
'platform': 'ios',
|
||||
})
|
||||
return {'formats': fmts, 'subtitles': subs}
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
self.raise_login_required()
|
||||
@@ -84,6 +85,9 @@ class NebulaBaseIE(InfoExtractor):
|
||||
continue
|
||||
raise
|
||||
|
||||
self.mark_watched(content_id, slug)
|
||||
return {'formats': fmts, 'subtitles': subs}
|
||||
|
||||
def _extract_video_metadata(self, episode):
|
||||
channel_url = traverse_obj(
|
||||
episode, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all=False)
|
||||
@@ -111,6 +115,13 @@ class NebulaBaseIE(InfoExtractor):
|
||||
'uploader_url': channel_url,
|
||||
}
|
||||
|
||||
def _mark_watched(self, content_id, slug):
|
||||
self._call_api(
|
||||
PATCHRequest(f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/progress/'),
|
||||
slug, 'Marking watched', 'Unable to mark watched', fatal=False,
|
||||
data=json.dumps({'completed': True}).encode(),
|
||||
headers={'content-type': 'application/json'})
|
||||
|
||||
|
||||
class NebulaIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:video'
|
||||
@@ -322,6 +333,7 @@ class NebulaClassIE(NebulaBaseIE):
|
||||
if not episode_url and metadata.get('premium'):
|
||||
self.raise_login_required()
|
||||
|
||||
self.mark_watched(metadata['id'], slug)
|
||||
if Art19IE.suitable(episode_url):
|
||||
return self.url_result(episode_url, Art19IE)
|
||||
return traverse_obj(metadata, {
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_bitrate,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
@@ -24,8 +24,6 @@ from ..utils import (
|
||||
qualities,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
@@ -34,13 +32,70 @@ from ..utils import (
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, require, traverse_obj
|
||||
|
||||
|
||||
class NiconicoIE(InfoExtractor):
|
||||
class NiconicoBaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_LOGIN_BASE = 'https://account.nicovideo.jp'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies('https://www.nicovideo.jp').get('user_session'))
|
||||
|
||||
def _raise_login_error(self, message, expected=True):
|
||||
raise ExtractorError(f'Unable to login: {message}', expected=expected)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
return
|
||||
|
||||
self._request_webpage(
|
||||
f'{self._LOGIN_BASE}/login', None, 'Requesting session cookies')
|
||||
webpage = self._download_webpage(
|
||||
f'{self._LOGIN_BASE}/login/redirector', None,
|
||||
'Logging in', 'Unable to log in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': f'{self._LOGIN_BASE}/login',
|
||||
}, data=urlencode_postdata({
|
||||
'mail_tel': username,
|
||||
'password': password,
|
||||
}))
|
||||
|
||||
if self.is_logged_in:
|
||||
return
|
||||
elif err_msg := traverse_obj(webpage, (
|
||||
{find_element(cls='notice error')}, {find_element(cls='notice__text')}, {clean_html},
|
||||
)):
|
||||
self._raise_login_error(err_msg or 'Invalid username or password')
|
||||
elif 'oneTimePw' in webpage:
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage, 'post url', group='url')
|
||||
mfa, urlh = self._download_webpage_handle(
|
||||
urljoin(self._LOGIN_BASE, post_url), None,
|
||||
'Performing MFA', 'Unable to complete MFA', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
'otp': self._get_tfa_info('6 digit number shown on app'),
|
||||
}))
|
||||
if self.is_logged_in:
|
||||
return
|
||||
elif 'error-code' in parse_qs(urlh.url):
|
||||
err_msg = traverse_obj(mfa, ({find_element(cls='pageMainMsg')}, {clean_html}))
|
||||
self._raise_login_error(err_msg or 'MFA session expired')
|
||||
elif 'formError' in mfa:
|
||||
err_msg = traverse_obj(mfa, (
|
||||
{find_element(cls='formError')}, {find_element(tag='div')}, {clean_html}))
|
||||
self._raise_login_error(err_msg or 'MFA challenge failed')
|
||||
|
||||
self._raise_login_error('Unexpected login error', expected=False)
|
||||
|
||||
|
||||
class NiconicoIE(NiconicoBaseIE):
|
||||
IE_NAME = 'niconico'
|
||||
IE_DESC = 'ニコニコ動画'
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
@@ -180,229 +235,6 @@ class NiconicoIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0',
|
||||
'X-Niconico-Language': 'en-us',
|
||||
'Referer': 'https://www.nicovideo.jp/',
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_ok = True
|
||||
login_form_strs = {
|
||||
'mail_tel': username,
|
||||
'password': password,
|
||||
}
|
||||
self._request_webpage(
|
||||
'https://account.nicovideo.jp/login', None,
|
||||
note='Acquiring Login session')
|
||||
page = self._download_webpage(
|
||||
'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
|
||||
note='Logging in', errnote='Unable to log in',
|
||||
data=urlencode_postdata(login_form_strs),
|
||||
headers={
|
||||
'Referer': 'https://account.nicovideo.jp/login',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if 'oneTimePw' in page:
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
|
||||
page = self._download_webpage(
|
||||
urljoin('https://account.nicovideo.jp', post_url), None,
|
||||
note='Performing MFA', errnote='Unable to complete MFA',
|
||||
data=urlencode_postdata({
|
||||
'otp': self._get_tfa_info('6 digits code'),
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if 'oneTimePw' in page or 'formError' in page:
|
||||
err_msg = self._html_search_regex(
|
||||
r'formError["\']+>(.*?)</div>', page, 'form_error',
|
||||
default='There\'s an error but the message can\'t be parsed.',
|
||||
flags=re.DOTALL)
|
||||
self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
|
||||
return False
|
||||
login_ok = 'class="notice error"' not in page
|
||||
if not login_ok:
|
||||
self.report_warning('Unable to log in: bad username or password')
|
||||
return login_ok
|
||||
|
||||
def _get_heartbeat_info(self, info_dict):
|
||||
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
||||
dmc_protocol = info_dict['expected_protocol']
|
||||
|
||||
api_data = (
|
||||
info_dict.get('_api_data')
|
||||
or self._parse_json(
|
||||
self._html_search_regex(
|
||||
'data-api-data="([^"]+)"',
|
||||
self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||
'API data', default='{}'),
|
||||
video_id))
|
||||
|
||||
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||
|
||||
def ping():
|
||||
tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
|
||||
if tracking_id:
|
||||
tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
|
||||
watch_request_response = self._download_json(
|
||||
tracking_url, video_id,
|
||||
note='Acquiring permission for downloading video', fatal=False,
|
||||
headers=self._API_HEADERS)
|
||||
if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
|
||||
self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
|
||||
|
||||
yesno = lambda x: 'yes' if x else 'no'
|
||||
|
||||
if dmc_protocol == 'http':
|
||||
protocol = 'http'
|
||||
protocol_parameters = {
|
||||
'http_output_download_parameters': {
|
||||
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
|
||||
},
|
||||
}
|
||||
elif dmc_protocol == 'hls':
|
||||
protocol = 'm3u8'
|
||||
segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
|
||||
parsed_token = self._parse_json(session_api_data['token'], video_id)
|
||||
encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
|
||||
protocol_parameters = {
|
||||
'hls_parameters': {
|
||||
'segment_duration': segment_duration,
|
||||
'transfer_preset': '',
|
||||
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
|
||||
},
|
||||
}
|
||||
if 'hls_encryption' in parsed_token and encryption:
|
||||
protocol_parameters['hls_parameters']['encryption'] = {
|
||||
parsed_token['hls_encryption']: {
|
||||
'encrypted_key': encryption['encryptedKey'],
|
||||
'key_uri': encryption['keyUri'],
|
||||
},
|
||||
}
|
||||
else:
|
||||
protocol = 'm3u8_native'
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
|
||||
|
||||
session_response = self._download_json(
|
||||
session_api_endpoint['url'], video_id,
|
||||
query={'_format': 'json'},
|
||||
headers={'Content-Type': 'application/json'},
|
||||
note='Downloading JSON metadata for {}'.format(info_dict['format_id']),
|
||||
data=json.dumps({
|
||||
'session': {
|
||||
'client_info': {
|
||||
'player_id': session_api_data.get('playerId'),
|
||||
},
|
||||
'content_auth': {
|
||||
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||
'service_id': 'nicovideo',
|
||||
'service_user_id': session_api_data.get('serviceUserId'),
|
||||
},
|
||||
'content_id': session_api_data.get('contentId'),
|
||||
'content_src_id_sets': [{
|
||||
'content_src_ids': [{
|
||||
'src_id_to_mux': {
|
||||
'audio_src_ids': [audio_src_id],
|
||||
'video_src_ids': [video_src_id],
|
||||
},
|
||||
}],
|
||||
}],
|
||||
'content_type': 'movie',
|
||||
'content_uri': '',
|
||||
'keep_method': {
|
||||
'heartbeat': {
|
||||
'lifetime': session_api_data.get('heartbeatLifetime'),
|
||||
},
|
||||
},
|
||||
'priority': session_api_data['priority'],
|
||||
'protocol': {
|
||||
'name': 'http',
|
||||
'parameters': {
|
||||
'http_parameters': {
|
||||
'parameters': protocol_parameters,
|
||||
},
|
||||
},
|
||||
},
|
||||
'recipe_id': session_api_data.get('recipeId'),
|
||||
'session_operation_auth': {
|
||||
'session_operation_auth_by_signature': {
|
||||
'signature': session_api_data.get('signature'),
|
||||
'token': session_api_data.get('token'),
|
||||
},
|
||||
},
|
||||
'timing_constraint': 'unlimited',
|
||||
},
|
||||
}).encode())
|
||||
|
||||
info_dict['url'] = session_response['data']['session']['content_uri']
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# get heartbeat info
|
||||
heartbeat_info_dict = {
|
||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||
'data': json.dumps(session_response['data']),
|
||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
||||
'ping': ping,
|
||||
}
|
||||
|
||||
return info_dict, heartbeat_info_dict
|
||||
|
||||
def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
|
||||
|
||||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||
return None
|
||||
|
||||
format_id = '-'.join(
|
||||
[remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
|
||||
|
||||
vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
|
||||
|
||||
return {
|
||||
'url': 'niconico_dmc:{}/{}/{}'.format(video_id, video_quality['id'], audio_quality['id']),
|
||||
'format_id': format_id,
|
||||
'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
|
||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||
'acodec': 'aac',
|
||||
'vcodec': 'h264',
|
||||
**traverse_obj(audio_quality, ('metadata', {
|
||||
'abr': ('bitrate', {float_or_none(scale=1000)}),
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(video_quality, ('metadata', {
|
||||
'vbr': ('bitrate', {float_or_none(scale=1000)}),
|
||||
'height': ('resolution', 'height', {int_or_none}),
|
||||
'width': ('resolution', 'width', {int_or_none}),
|
||||
})),
|
||||
'quality': -2 if 'low' in video_quality['id'] else None,
|
||||
'protocol': 'niconico_dmc',
|
||||
'expected_protocol': dmc_protocol, # XXX: This is not a documented field
|
||||
'http_headers': {
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
},
|
||||
}
|
||||
|
||||
def _yield_dmc_formats(self, api_data, video_id):
|
||||
dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
|
||||
audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
|
||||
videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
|
||||
protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
|
||||
if not all((audios, videos, protocols)):
|
||||
return
|
||||
|
||||
for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
|
||||
if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
|
||||
yield fmt
|
||||
|
||||
def _yield_dms_formats(self, api_data, video_id):
|
||||
fmt_filter = lambda _, v: v['isAvailable'] and v['id']
|
||||
@@ -451,42 +283,61 @@ class NiconicoIE(InfoExtractor):
|
||||
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1
|
||||
yield video_fmt
|
||||
|
||||
def _extract_server_response(self, webpage, video_id, fatal=True):
|
||||
try:
|
||||
return traverse_obj(
|
||||
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
|
||||
('data', 'response', {dict}, {require('server response')}))
|
||||
except ExtractorError:
|
||||
if not fatal:
|
||||
return {}
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
webpage, handle = self._download_webpage_handle(
|
||||
'https://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
f'https://www.nicovideo.jp/watch/{video_id}', video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
if video_id.startswith('so'):
|
||||
video_id = self._match_id(handle.url)
|
||||
|
||||
api_data = traverse_obj(
|
||||
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
|
||||
('data', 'response', {dict}))
|
||||
if not api_data:
|
||||
raise ExtractorError('Server response data not found')
|
||||
api_data = self._extract_server_response(webpage, video_id)
|
||||
except ExtractorError as e:
|
||||
try:
|
||||
api_data = self._download_json(
|
||||
f'https://www.nicovideo.jp/api/watch/v3/{video_id}?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_{round(time.time() * 1000)}', video_id,
|
||||
note='Downloading API JSON', errnote='Unable to fetch data')['data']
|
||||
f'https://www.nicovideo.jp/api/watch/v3/{video_id}', video_id,
|
||||
'Downloading API JSON', 'Unable to fetch data', query={
|
||||
'_frontendId': '6',
|
||||
'_frontendVersion': '0',
|
||||
'actionTrackId': f'AAAAAAAAAA_{round(time.time() * 1000)}',
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
except ExtractorError:
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
# Raise if original exception was from _parse_json or utils.traversal.require
|
||||
raise
|
||||
# The webpage server response has more detailed error info than the API response
|
||||
webpage = e.cause.response.read().decode('utf-8', 'replace')
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
|
||||
webpage, 'error reason', default=None)
|
||||
if not error_msg:
|
||||
reason_code = self._extract_server_response(
|
||||
webpage, video_id, fatal=False).get('reasonCode')
|
||||
if not reason_code:
|
||||
raise
|
||||
raise ExtractorError(clean_html(error_msg), expected=True)
|
||||
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
elif reason_code == 'HIDDEN_VIDEO':
|
||||
raise ExtractorError(
|
||||
'The viewing period of this video has expired', expected=True)
|
||||
elif reason_code == 'DELETED_VIDEO':
|
||||
raise ExtractorError('This video has been deleted', expected=True)
|
||||
raise ExtractorError(f'Niconico says: {reason_code}')
|
||||
|
||||
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', {
|
||||
'needs_premium': ('isPremium', {bool}),
|
||||
'needs_subscription': ('isAdmission', {bool}),
|
||||
})) or {'needs_auth': True}))
|
||||
formats = [*self._yield_dmc_formats(api_data, video_id),
|
||||
*self._yield_dms_formats(api_data, video_id)]
|
||||
|
||||
formats = list(self._yield_dms_formats(api_data, video_id))
|
||||
if not formats:
|
||||
fail_msg = clean_html(self._html_search_regex(
|
||||
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
|
||||
@@ -921,7 +772,7 @@ class NiconicoUserIE(InfoExtractor):
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
||||
|
||||
class NiconicoLiveIE(InfoExtractor):
|
||||
class NiconicoLiveIE(NiconicoBaseIE):
|
||||
IE_NAME = 'niconico:live'
|
||||
IE_DESC = 'ニコニコ生放送'
|
||||
_VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
|
||||
@@ -953,8 +804,6 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_KNOWN_LATENCY = ('high', 'low')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
|
||||
@@ -970,22 +819,19 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
})
|
||||
|
||||
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
|
||||
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
|
||||
if latency not in self._KNOWN_LATENCY:
|
||||
latency = 'high'
|
||||
|
||||
ws = self._request_webpage(
|
||||
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
|
||||
video_id=video_id, note='Connecting to WebSocket server')
|
||||
|
||||
self.write_debug('[debug] Sending HLS server request')
|
||||
self.write_debug('Sending HLS server request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'stream': {
|
||||
'quality': 'abr',
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': latency,
|
||||
'protocol': 'hls',
|
||||
'latency': 'high',
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
},
|
||||
@@ -1049,18 +895,29 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
for cookie in cookies:
|
||||
self._set_cookie(
|
||||
cookie['domain'], cookie['name'], cookie['value'],
|
||||
expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
|
||||
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
|
||||
|
||||
fmt_common = {
|
||||
'live_latency': 'high',
|
||||
'origin': hostname,
|
||||
'protocol': 'niconico_live',
|
||||
'video_id': video_id,
|
||||
'ws': ws,
|
||||
}
|
||||
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
|
||||
a_map = {96: 'audio_low', 192: 'audio_high'}
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||
for fmt, q in zip(formats, reversed(qualities[1:])):
|
||||
fmt.update({
|
||||
'format_id': q,
|
||||
'protocol': 'niconico_live',
|
||||
'ws': ws,
|
||||
'video_id': video_id,
|
||||
'live_latency': latency,
|
||||
'origin': hostname,
|
||||
})
|
||||
for fmt in formats:
|
||||
if fmt.get('acodec') == 'none':
|
||||
fmt['format_id'] = next(q_iter, fmt['format_id'])
|
||||
elif fmt.get('vcodec') == 'none':
|
||||
abr = parse_bitrate(fmt['url'].lower())
|
||||
fmt.update({
|
||||
'abr': abr,
|
||||
'format_id': a_map.get(abr, fmt['format_id']),
|
||||
})
|
||||
fmt.update(fmt_common)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -181,6 +181,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 119.0,
|
||||
},
|
||||
'skip': 'HTTP Error 500: Internal Server Error',
|
||||
}, {
|
||||
# article with audio and no video
|
||||
'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
|
||||
@@ -190,13 +191,14 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
|
||||
'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
|
||||
'timestamp': 1695960700,
|
||||
'timestamp': 1696008129,
|
||||
'upload_date': '20230929',
|
||||
'creator': 'Stephanie Nolen, Natalija Gormalova',
|
||||
'creators': ['Stephanie Nolen', 'Natalija Gormalova'],
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 1322,
|
||||
},
|
||||
}, {
|
||||
# lede_media_block already has sourceId
|
||||
'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
@@ -207,7 +209,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'timestamp': 1701290997,
|
||||
'upload_date': '20231129',
|
||||
'uploader': 'By The New York Times',
|
||||
'creator': 'Katie Rogers',
|
||||
'creators': ['Katie Rogers'],
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 97.631,
|
||||
},
|
||||
@@ -222,10 +224,22 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
|
||||
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
|
||||
'upload_date': '20231202',
|
||||
'creator': 'Emily Steel, Sydney Ember',
|
||||
'creators': ['Emily Steel', 'Sydney Ember'],
|
||||
'timestamp': 1701511264,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# lede_media_block does not have sourceId
|
||||
'url': 'https://www.nytimes.com/2025/04/30/well/move/hip-mobility-routine.html',
|
||||
'info_dict': {
|
||||
'id': 'hip-mobility-routine',
|
||||
'title': 'Tight Hips? These Moves Can Help.',
|
||||
'description': 'Sitting all day is hard on your hips. Try this simple routine for better mobility.',
|
||||
'creators': ['Alyssa Ages', 'Theodore Tae'],
|
||||
'timestamp': 1746003629,
|
||||
'upload_date': '20250430',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
|
||||
'only_matching': True,
|
||||
@@ -256,14 +270,18 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
webpage = self._download_webpage(url, page_id, impersonate=True)
|
||||
art_json = self._search_json(
|
||||
r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
|
||||
transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
|
||||
content = art_json['sprinkledBody']['content']
|
||||
|
||||
blocks = traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., ('ledeMedia', None),
|
||||
lambda _, v: v['__typename'] in ('Video', 'Audio')))
|
||||
blocks = []
|
||||
block_filter = lambda k, v: k == 'media' and v['__typename'] in ('Video', 'Audio')
|
||||
if lede_media_block := traverse_obj(content, (..., 'ledeMedia', block_filter, any)):
|
||||
lede_media_block.setdefault('sourceId', art_json.get('sourceId'))
|
||||
blocks.append(lede_media_block)
|
||||
blocks.extend(traverse_obj(content, (..., block_filter)))
|
||||
if not blocks:
|
||||
raise ExtractorError('Unable to extract any media blocks from webpage')
|
||||
|
||||
@@ -273,8 +291,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
|
||||
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
|
||||
'creator': ', '.join(
|
||||
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
|
||||
'creators': traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName', {str})),
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class OnceIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
|
||||
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
|
||||
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
|
||||
|
||||
def _extract_once_formats(self, url, http_formats_preference=None):
|
||||
domain_id, application_id, media_item_id = re.match(
|
||||
OnceIE._VALID_URL, url).groups()
|
||||
formats = self._extract_m3u8_formats(
|
||||
self.ADAPTIVE_URL_TEMPLATE % (
|
||||
domain_id, application_id, media_item_id),
|
||||
media_item_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
progressive_formats = []
|
||||
for adaptive_format in formats:
|
||||
# Prevent advertisement from embedding into m3u8 playlist (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8893#issuecomment-199912684)
|
||||
adaptive_format['url'] = re.sub(
|
||||
r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
|
||||
rendition_id = self._search_regex(
|
||||
r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
|
||||
adaptive_format['url'], 'redition id', default=None)
|
||||
if rendition_id:
|
||||
progressive_format = adaptive_format.copy()
|
||||
progressive_format.update({
|
||||
'url': self.PROGRESSIVE_URL_TEMPLATE % (
|
||||
domain_id, application_id, rendition_id, media_item_id),
|
||||
'format_id': adaptive_format['format_id'].replace(
|
||||
'hls', 'http'),
|
||||
'protocol': 'http',
|
||||
'preference': http_formats_preference,
|
||||
})
|
||||
progressive_formats.append(progressive_format)
|
||||
self._check_formats(progressive_formats, media_item_id)
|
||||
formats.extend(progressive_formats)
|
||||
return formats
|
||||
@@ -14,8 +14,9 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
srt_subtitles_timecode,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PanoptoBaseIE(InfoExtractor):
|
||||
@@ -345,21 +346,16 @@ class PanoptoIE(PanoptoBaseIE):
|
||||
subtitles = {}
|
||||
for stream in streams or []:
|
||||
stream_formats = []
|
||||
http_stream_url = stream.get('StreamHttpUrl')
|
||||
stream_url = stream.get('StreamUrl')
|
||||
|
||||
if http_stream_url:
|
||||
stream_formats.append({'url': http_stream_url})
|
||||
|
||||
if stream_url:
|
||||
for stream_url in set(traverse_obj(stream, (('StreamHttpUrl', 'StreamUrl'), {url_or_none}))):
|
||||
media_type = stream.get('ViewerMediaFileTypeName')
|
||||
if media_type in ('hls', ):
|
||||
m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
|
||||
stream_formats.extend(m3u8_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, stream_subtitles)
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, m3u8_id='hls', fatal=False)
|
||||
stream_formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
stream_formats.append({
|
||||
'url': stream_url,
|
||||
'ext': media_type,
|
||||
})
|
||||
for fmt in stream_formats:
|
||||
fmt.update({
|
||||
|
||||
@@ -340,8 +340,9 @@ class PatreonIE(PatreonBaseIE):
|
||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||
}))
|
||||
|
||||
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
|
||||
headers = {'referer': 'https://patreon.com/'}
|
||||
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
|
||||
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
|
||||
headers = {'referer': 'https://www.patreon.com/'}
|
||||
|
||||
# handle Vimeo embeds
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
@@ -352,7 +353,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
|
||||
entries.append(self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE._smuggle_referrer(v_url, headers['referer']),
|
||||
VimeoIE, url_transparent=True))
|
||||
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
@@ -379,11 +380,13 @@ class PatreonIE(PatreonBaseIE):
|
||||
'url': post_file['url'],
|
||||
})
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
post_file['url'], video_id, headers=headers)
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
})
|
||||
|
||||
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import re
|
||||
|
||||
from .youtube import YoutubeIE
|
||||
from .zdf import ZDFBaseIE
|
||||
from ..utils import (
|
||||
@@ -7,44 +5,27 @@ from ..utils import (
|
||||
merge_dicts,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PhoenixIE(ZDFBaseIE):
|
||||
IE_NAME = 'phoenix.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/?#]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
# Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
|
||||
'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
|
||||
'md5': '34ec321e7eb34231fd88616c65c92db0',
|
||||
'url': 'https://www.phoenix.de/sendungen/dokumentationen/spitzbergen-a-893349.html',
|
||||
'md5': 'a79e86d9774d0b3f2102aff988a0bd32',
|
||||
'info_dict': {
|
||||
'id': '210222_phx_nachgehakt_corona_protest',
|
||||
'id': '221215_phx_spitzbergen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||
'duration': 1691,
|
||||
'timestamp': 1613902500,
|
||||
'upload_date': '20210221',
|
||||
'title': 'Spitzbergen',
|
||||
'description': 'Film von Tilmann Bünz',
|
||||
'duration': 728.0,
|
||||
'timestamp': 1555600500,
|
||||
'upload_date': '20190418',
|
||||
'uploader': 'Phoenix',
|
||||
'series': 'corona nachgehakt',
|
||||
'episode': 'Wohin führt der Protest in der Pandemie?',
|
||||
},
|
||||
}, {
|
||||
# Youtube embed
|
||||
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
|
||||
'info_dict': {
|
||||
'id': 'hMQtqFYjomk',
|
||||
'ext': 'mp4',
|
||||
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
|
||||
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
|
||||
'duration': 3509,
|
||||
'upload_date': '20201219',
|
||||
'uploader': 'phoenix',
|
||||
'uploader_id': 'phoenix',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'thumbnail': 'https://www.phoenix.de/sixcms/media.php/21/Bergspitzen1.png',
|
||||
'series': 'Dokumentationen',
|
||||
'episode': 'Spitzbergen',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
|
||||
@@ -90,8 +71,8 @@ class PhoenixIE(ZDFBaseIE):
|
||||
content_id = details['tracking']['nielsen']['content']['assetid']
|
||||
|
||||
info = self._extract_ptmd(
|
||||
f'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/{content_id}',
|
||||
content_id, None, url)
|
||||
f'https://tmd.phoenix.de/tmd/2/android_native_6/vod/ptmd/phoenix/{content_id}',
|
||||
content_id)
|
||||
|
||||
duration = int_or_none(try_get(
|
||||
details, lambda x: x['tracking']['nielsen']['content']['length']))
|
||||
@@ -101,20 +82,8 @@ class PhoenixIE(ZDFBaseIE):
|
||||
str)
|
||||
episode = title if details.get('contentType') == 'episode' else None
|
||||
|
||||
thumbnails = []
|
||||
teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
|
||||
for thumbnail_key, thumbnail_url in teaser_images.items():
|
||||
thumbnail_url = urljoin(url, thumbnail_url)
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
thumbnails.append(thumbnail)
|
||||
thumbnails = self._extract_thumbnails(teaser_images)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': content_id,
|
||||
|
||||
@@ -10,7 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
IE_NAME = 'picarto'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[^/#?]+)/?(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@@ -89,7 +90,8 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
|
||||
class PicartoVodIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)'
|
||||
IE_NAME = 'picarto:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+(?:/profile)?/videos)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
|
||||
'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
|
||||
@@ -111,6 +113,18 @@ class PicartoVodIE(InfoExtractor):
|
||||
'channel': 'ArtofZod',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://picarto.tv/DrechuArt/profile/videos/400347',
|
||||
'md5': 'f9ea54868b1d9dec40eb554b484cc7bf',
|
||||
'info_dict': {
|
||||
'id': '400347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to the Show',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'channel': 'DrechuArt',
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'https://picarto.tv/videopopout/Plague',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -7,11 +7,12 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj, unpack
|
||||
|
||||
|
||||
class PlaySuisseIE(InfoExtractor):
|
||||
@@ -26,12 +27,12 @@ class PlaySuisseIE(InfoExtractor):
|
||||
{
|
||||
# episode in a series
|
||||
'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
|
||||
'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
|
||||
'md5': 'e20d1ede6872a03b41905ca1060a1ef2',
|
||||
'info_dict': {
|
||||
'id': '763211',
|
||||
'ext': 'mp4',
|
||||
'title': 'Knochen',
|
||||
'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
|
||||
'description': 'md5:3bdd80e2ce20227c47aab1df2a79a519',
|
||||
'duration': 3344,
|
||||
'series': 'Wilder',
|
||||
'season': 'Season 1',
|
||||
@@ -42,24 +43,33 @@ class PlaySuisseIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# film
|
||||
'url': 'https://www.playsuisse.ch/watch/808675',
|
||||
'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
|
||||
'url': 'https://www.playsuisse.ch/detail/2573198',
|
||||
'md5': '1f115bb0a5191477b1a5771643a4283d',
|
||||
'info_dict': {
|
||||
'id': '808675',
|
||||
'id': '2573198',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Läufer',
|
||||
'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
|
||||
'duration': 5280,
|
||||
'title': 'Azor',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'genres': ['Fiction'],
|
||||
'creators': ['Andreas Fontana'],
|
||||
'cast': ['Fabrizio Rongione', 'Stéphanie Cléau', 'Gilles Privat', 'Alexandre Trocki'],
|
||||
'location': 'France; Argentine',
|
||||
'release_year': 2021,
|
||||
'duration': 5981,
|
||||
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
|
||||
},
|
||||
}, {
|
||||
# series (treated as a playlist)
|
||||
'url': 'https://www.playsuisse.ch/detail/1115687',
|
||||
'info_dict': {
|
||||
'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
|
||||
'id': '1115687',
|
||||
'series': 'They all came out to Montreux',
|
||||
'title': 'They all came out to Montreux',
|
||||
'description': 'md5:0fefd8c5b4468a0bb35e916887681520',
|
||||
'genres': ['Documentary'],
|
||||
'creators': ['Oliver Murray'],
|
||||
'location': 'Switzerland',
|
||||
'release_year': 2021,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
@@ -120,6 +130,12 @@ class PlaySuisseIE(InfoExtractor):
|
||||
id
|
||||
name
|
||||
description
|
||||
descriptionLong
|
||||
year
|
||||
contentTypes
|
||||
directors
|
||||
mainCast
|
||||
productionCountries
|
||||
duration
|
||||
episodeNumber
|
||||
seasonNumber
|
||||
@@ -215,9 +231,7 @@ class PlaySuisseIE(InfoExtractor):
|
||||
if not self._ID_TOKEN:
|
||||
raise ExtractorError('Login failed')
|
||||
|
||||
def _get_media_data(self, media_id):
|
||||
# NOTE In the web app, the "locale" header is used to switch between languages,
|
||||
# However this doesn't seem to take effect when passing the header here.
|
||||
def _get_media_data(self, media_id, locale=None):
|
||||
response = self._download_json(
|
||||
'https://www.playsuisse.ch/api/graphql',
|
||||
media_id, data=json.dumps({
|
||||
@@ -225,7 +239,7 @@ class PlaySuisseIE(InfoExtractor):
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {'assetId': media_id},
|
||||
}).encode(),
|
||||
headers={'Content-Type': 'application/json', 'locale': 'de'})
|
||||
headers={'Content-Type': 'application/json', 'locale': locale or 'de'})
|
||||
|
||||
return response['data']['assetV2']
|
||||
|
||||
@@ -234,7 +248,7 @@ class PlaySuisseIE(InfoExtractor):
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._get_media_data(media_id)
|
||||
media_data = self._get_media_data(media_id, traverse_obj(parse_qs(url), ('locale', 0)))
|
||||
info = self._extract_single(media_data)
|
||||
if media_data.get('episodes'):
|
||||
info.update({
|
||||
@@ -257,15 +271,22 @@ class PlaySuisseIE(InfoExtractor):
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': media_data['id'],
|
||||
'title': media_data.get('name'),
|
||||
'description': media_data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': int_or_none(media_data.get('duration')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': media_data.get('seriesName'),
|
||||
'season_number': int_or_none(media_data.get('seasonNumber')),
|
||||
'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
|
||||
'episode_number': int_or_none(media_data.get('episodeNumber')),
|
||||
**traverse_obj(media_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('name', {str}),
|
||||
'description': (('descriptionLong', 'description'), {str}, any),
|
||||
'genres': ('contentTypes', ..., {str}),
|
||||
'creators': ('directors', ..., {str}),
|
||||
'cast': ('mainCast', ..., {str}),
|
||||
'location': ('productionCountries', ..., {str}, all, {unpack(join_nonempty, delim='; ')}, filter),
|
||||
'release_year': ('year', {str}, {lambda x: x[:4]}, {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'series': ('seriesName', {str}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode': ('name', {str}, {lambda x: x if media_data['episodeNumber'] is not None else None}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -5,11 +5,13 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class PodchaserIE(InfoExtractor):
|
||||
@@ -21,24 +23,25 @@ class PodchaserIE(InfoExtractor):
|
||||
'id': '104365585',
|
||||
'title': 'Ep. 285 – freeze me off',
|
||||
'description': 'cam ahn',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'ext': 'mp3',
|
||||
'categories': ['Comedy'],
|
||||
'categories': ['Comedy', 'News', 'Politics', 'Arts'],
|
||||
'tags': ['comedy', 'dark humor'],
|
||||
'series': 'Cum Town',
|
||||
'series': 'The Adam Friedland Show Podcast',
|
||||
'duration': 3708,
|
||||
'timestamp': 1636531259,
|
||||
'upload_date': '20211110',
|
||||
'average_rating': 4.0,
|
||||
'series_id': '36924',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
|
||||
'info_dict': {
|
||||
'id': '28853',
|
||||
'title': 'The Bone Zone',
|
||||
'description': 'Podcast by The Bone Zone',
|
||||
'description': r're:The official home of the Bone Zone podcast.+',
|
||||
},
|
||||
'playlist_count': 275,
|
||||
'playlist_mincount': 275,
|
||||
}, {
|
||||
'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
|
||||
'info_dict': {
|
||||
@@ -51,19 +54,33 @@ class PodchaserIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _parse_episode(episode, podcast):
|
||||
return {
|
||||
'id': str(episode.get('id')),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'url': episode.get('audio_url'),
|
||||
'thumbnail': episode.get('image_url'),
|
||||
'duration': str_to_int(episode.get('length')),
|
||||
'timestamp': unified_timestamp(episode.get('air_date')),
|
||||
'average_rating': float_or_none(episode.get('rating')),
|
||||
'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
|
||||
'tags': traverse_obj(podcast, ('tags', ..., 'text')),
|
||||
'series': podcast.get('title'),
|
||||
}
|
||||
info = traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}, {require('episode ID')}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'url': ('audio_url', {url_or_none}),
|
||||
'thumbnail': ('image_url', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'timestamp': ('air_date', {unified_timestamp}),
|
||||
'average_rating': ('rating', {float_or_none}),
|
||||
})
|
||||
info.update(traverse_obj(podcast, {
|
||||
'series': ('title', {str}),
|
||||
'series_id': ('id', {int}, {str_or_none}),
|
||||
'categories': (('summary', None), 'categories', ..., 'text', {str}, filter, all, {orderedSet}),
|
||||
'tags': ('tags', ..., 'text', {str}),
|
||||
}))
|
||||
info['vcodec'] = 'none'
|
||||
|
||||
if info.get('series_id'):
|
||||
podcast_slug = traverse_obj(podcast, ('slug', {str})) or 'podcast'
|
||||
episode_slug = traverse_obj(episode, ('slug', {str})) or 'episode'
|
||||
info['webpage_url'] = '/'.join((
|
||||
'https://www.podchaser.com/podcasts',
|
||||
'-'.join((podcast_slug[:30].rstrip('-'), info['series_id'])),
|
||||
'-'.join((episode_slug[:30].rstrip('-'), info['id']))))
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, path, *args, **kwargs):
|
||||
return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
|
||||
@@ -93,5 +110,5 @@ class PodchaserIE(InfoExtractor):
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
|
||||
str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
|
||||
|
||||
episode = self._call_api(f'episodes/{episode_id}', episode_id)
|
||||
episode = self._call_api(f'podcasts/{podcast_id}/episodes/{episode_id}/player_ids', episode_id)
|
||||
return self._parse_episode(episode, podcast)
|
||||
|
||||
@@ -321,6 +321,27 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'timestamp': 1348495020,
|
||||
'upload_date': '20120924',
|
||||
},
|
||||
}, {
|
||||
# checking program_info gives false positive for DRM
|
||||
'url': 'https://www.raiplay.it/video/2022/10/Ad-ogni-costo---Un-giorno-in-Pretura---Puntata-del-15102022-1dfd1295-ea38-4bac-b51e-f87e2881693b.html',
|
||||
'md5': '572c6f711b7c5f2d670ba419b4ae3b08',
|
||||
'info_dict': {
|
||||
'id': '1dfd1295-ea38-4bac-b51e-f87e2881693b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ad ogni costo - Un giorno in Pretura - Puntata del 15/10/2022',
|
||||
'alt_title': 'St 2022/23 - Un giorno in pretura - Ad ogni costo',
|
||||
'description': 'md5:4046d97b2687f74f06a8b8270ba5599f',
|
||||
'uploader': 'Rai 3',
|
||||
'duration': 3773.0,
|
||||
'thumbnail': 'https://www.raiplay.it/dl/img/2022/10/12/1665586539957_2048x2048.png',
|
||||
'creators': ['Rai 3'],
|
||||
'series': 'Un giorno in pretura',
|
||||
'season': '2022/23',
|
||||
'episode': 'Ad ogni costo',
|
||||
'timestamp': 1665507240,
|
||||
'upload_date': '20221011',
|
||||
'release_year': 2025,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
'only_matching': True,
|
||||
@@ -340,9 +361,8 @@ class RaiPlayIE(RaiBaseIE):
|
||||
media = self._download_json(
|
||||
f'{base}.json', video_id, 'Downloading video JSON')
|
||||
|
||||
if not self.get_param('allow_unplayable_formats'):
|
||||
if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')):
|
||||
self.report_drm(video_id)
|
||||
if traverse_obj(media, ('rights_management', 'rights', 'drm')):
|
||||
self.report_drm(video_id)
|
||||
|
||||
video = media['video']
|
||||
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
||||
|
||||
@@ -388,7 +388,8 @@ class RedditIE(InfoExtractor):
|
||||
})
|
||||
if entries:
|
||||
return self.playlist_result(entries, video_id, **info)
|
||||
raise ExtractorError('No media found', expected=True)
|
||||
self.raise_no_formats('No media found', expected=True, video_id=video_id)
|
||||
return {**info, 'id': video_id}
|
||||
|
||||
# Check if media is hosted on reddit:
|
||||
reddit_video = traverse_obj(data, (
|
||||
|
||||
@@ -1,35 +1,142 @@
|
||||
import base64
|
||||
import io
|
||||
import struct
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
make_archive_id,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
remove_end,
|
||||
remove_start,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
class RTVEBaseIE(InfoExtractor):
|
||||
# Reimplementation of https://js2.rtve.es/pages/app-player/3.5.1/js/pf_video.js
|
||||
@staticmethod
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
|
||||
while True:
|
||||
length_data = encrypted_data.read(4)
|
||||
length = struct.unpack('!I', length_data)[0]
|
||||
chunk_type = encrypted_data.read(4)
|
||||
if chunk_type == b'IEND':
|
||||
break
|
||||
data = encrypted_data.read(length)
|
||||
if chunk_type == b'tEXt':
|
||||
data = bytes(filter(None, data))
|
||||
alphabet_data, _, url_data = data.partition(b'#')
|
||||
quality_str, _, url_data = url_data.rpartition(b'%%')
|
||||
quality_str = quality_str.decode() or ''
|
||||
alphabet = RTVEBaseIE._get_alphabet(alphabet_data)
|
||||
url = RTVEBaseIE._get_url(alphabet, url_data)
|
||||
yield quality_str, url
|
||||
encrypted_data.read(4) # CRC
|
||||
|
||||
@staticmethod
|
||||
def _get_url(alphabet, url_data):
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for char in url_data.decode('iso-8859-1'):
|
||||
if f == 0:
|
||||
l = int(char) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(char)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def _get_alphabet(alphabet_data):
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for char in alphabet_data.decode('iso-8859-1'):
|
||||
if d == 0:
|
||||
alphabet.append(char)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
return alphabet
|
||||
|
||||
def _extract_png_formats_and_subtitles(self, video_id, media_type='videos'):
|
||||
formats, subtitles = [], {}
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
for manager in ('rtveplayw', 'default'):
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{manager}/{media_type}/{video_id}.png',
|
||||
video_id, 'Downloading url information', query={'q': 'v2'}, fatal=False)
|
||||
if not png:
|
||||
continue
|
||||
|
||||
for quality, video_url in self._decrypt_url(png):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
video_url, video_id, 'dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': video_url,
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_metadata(self, metadata):
|
||||
return traverse_obj(metadata, {
|
||||
'title': ('title', {str.strip}),
|
||||
'alt_title': ('alt', {str.strip}),
|
||||
'description': ('description', {clean_html}),
|
||||
'timestamp': ('dateOfEmission', {parse_iso8601(delimiter=' ')}),
|
||||
'release_timestamp': ('publicationDate', {parse_iso8601(delimiter=' ')}),
|
||||
'modified_timestamp': ('modificationDate', {parse_iso8601(delimiter=' ')}),
|
||||
'thumbnail': (('thumbnail', 'image', 'imageSEO'), {url_or_none}, any),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'is_live': ('live', {bool}),
|
||||
'series': (('programTitle', ('programInfo', 'title')), {clean_html}, any),
|
||||
})
|
||||
|
||||
|
||||
class RTVEALaCartaIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
IE_DESC = 'RTVE a la carta and Play'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?rtve\.es/(?:m/)?(?:(?:alacarta|play)/videos|filmoteca)/(?!directo)(?:[^/?#]+/){2}(?P<id>\d+)',
|
||||
r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/?#]+/video/[^/?#]+/(?P<id>\d+)',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/la-aventura-del-saber/aventuraentornosilla/3088905/',
|
||||
'md5': 'a964547824359a5753aef09d79fe984b',
|
||||
'info_dict': {
|
||||
'id': '2491869',
|
||||
'id': '3088905',
|
||||
'ext': 'mp4',
|
||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||
'duration': 5024.566,
|
||||
'series': 'Balonmano',
|
||||
'title': 'En torno a la silla',
|
||||
'duration': 1216.981,
|
||||
'series': 'La aventura del Saber',
|
||||
'thumbnail': 'https://img2.rtve.es/v/aventuraentornosilla_3088905.png',
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'note': 'Live stream',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||
@@ -38,140 +145,88 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
|
||||
'md5': 'f3cf0d1902d008c48c793e736706c174',
|
||||
'info_dict': {
|
||||
'id': '4236788',
|
||||
'ext': 'mp4',
|
||||
'title': 'Servir y proteger - Capítulo 104',
|
||||
'duration': 3222.0,
|
||||
'title': 'Episodio 104',
|
||||
'duration': 3222.8,
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'series': 'Servir y proteger',
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/videos/saber-vivir/07-07-24/16177116/',
|
||||
'md5': 'a5b24fcdfa3ff5cb7908aba53d22d4b6',
|
||||
'info_dict': {
|
||||
'id': '16177116',
|
||||
'ext': 'mp4',
|
||||
'title': 'Saber vivir - 07/07/24',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'duration': 2162.68,
|
||||
'series': 'Saber vivir',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/infantil/serie/agus-lui-churros-crafts/video/gusano/7048976/',
|
||||
'info_dict': {
|
||||
'id': '7048976',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gusano',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'duration': 292.86,
|
||||
'series': 'Agus & Lui: Churros y Crafts',
|
||||
'_old_archive_ids': ['rtveinfantil 7048976'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8')
|
||||
self._manager = self._download_json(
|
||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||
None, 'Fetching manager info')['manager']
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
|
||||
while True:
|
||||
length = struct.unpack('!I', encrypted_data.read(4))[0]
|
||||
chunk_type = encrypted_data.read(4)
|
||||
if chunk_type == b'IEND':
|
||||
break
|
||||
data = encrypted_data.read(length)
|
||||
if chunk_type == b'tEXt':
|
||||
alphabet_data, text = data.split(b'\0')
|
||||
quality, url_data = text.split(b'%%')
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in alphabet_data.decode('iso-8859-1'):
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in url_data.decode('iso-8859-1'):
|
||||
if f == 0:
|
||||
l = int(letter) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
yield quality.decode(), url
|
||||
encrypted_data.read(4) # CRC
|
||||
|
||||
def _extract_png_formats(self, video_id):
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
|
||||
video_id, 'Downloading url information', query={'q': 'v2'})
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
formats = []
|
||||
for quality, video_url in self._decrypt_url(png):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': video_url,
|
||||
})
|
||||
return formats
|
||||
def _get_subtitles(self, video_id):
|
||||
subtitle_data = self._download_json(
|
||||
f'https://api2.rtve.es/api/videos/{video_id}/subtitulos.json', video_id,
|
||||
'Downloading subtitles info')
|
||||
return traverse_obj(subtitle_data, ('page', 'items', ..., {
|
||||
'id': ('lang', {str}),
|
||||
'url': ('src', {url_or_none}),
|
||||
}, all, {subs_list_to_dict(lang='es')}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
metadata = self._download_json(
|
||||
f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
|
||||
video_id)['page']['items'][0]
|
||||
if info['state'] == 'DESPU':
|
||||
if metadata['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
title = info['title'].strip()
|
||||
formats = self._extract_png_formats(video_id)
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(video_id)
|
||||
|
||||
subtitles = None
|
||||
sbt_file = info.get('sbtFile')
|
||||
if sbt_file:
|
||||
subtitles = self.extract_subtitles(video_id, sbt_file)
|
||||
self._merge_subtitles(self.extract_subtitles(video_id), target=subtitles)
|
||||
|
||||
is_live = info.get('live') is True
|
||||
is_infantil = urllib.parse.urlparse(url).path.startswith('/infantil/')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('image'),
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
'is_live': is_live,
|
||||
'series': info.get('programTitle'),
|
||||
**self._parse_metadata(metadata),
|
||||
'_old_archive_ids': [make_archive_id('rtveinfantil', video_id)] if is_infantil else None,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, sub_file):
|
||||
subs = self._download_json(
|
||||
sub_file + '.json', video_id,
|
||||
'Downloading subtitles info')['page']['items']
|
||||
return dict(
|
||||
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
|
||||
for s in subs)
|
||||
|
||||
|
||||
class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
class RTVEAudioIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:audio'
|
||||
IE_DESC = 'RTVE audio'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/(?:[^/?#]+/){2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
|
||||
@@ -180,9 +235,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '5889192',
|
||||
'ext': 'mp3',
|
||||
'title': 'Códigos informáticos',
|
||||
'thumbnail': r're:https?://.+/1598856591583.jpg',
|
||||
'alt_title': 'Códigos informáticos - Escuchar ahora',
|
||||
'duration': 349.440,
|
||||
'series': 'A hombros de gigantes',
|
||||
'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb',
|
||||
'thumbnail': 'https://img2.rtve.es/a/palabra-ingeniero-codigos-informaticos-270421_5889192.png',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
|
||||
@@ -191,9 +248,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '5791165',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ignatius Farray',
|
||||
'alt_title': 'En Radio 3 - Ignatius Farray - 13/02/21 - escuchar ahora',
|
||||
'thumbnail': r're:https?://.+/1613243011863.jpg',
|
||||
'duration': 3559.559,
|
||||
'series': 'En Radio 3',
|
||||
'description': 'md5:124aa60b461e0b1724a380bad3bc4040',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
|
||||
@@ -202,126 +261,101 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '6082623',
|
||||
'ext': 'mp3',
|
||||
'title': 'Capítulo 26 y último: La muerte de Victor',
|
||||
'alt_title': 'Frankenstein o el moderno Prometeo - Capítulo 26 y último: La muerte de Victor',
|
||||
'thumbnail': r're:https?://.+/1632147445707.jpg',
|
||||
'duration': 3174.086,
|
||||
'series': 'Frankenstein o el moderno Prometeo',
|
||||
'description': 'md5:4ee6fcb82ebe2e46d267e1d1c1a8f7b5',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_png_formats(self, audio_id):
|
||||
"""
|
||||
This function retrieves media related png thumbnail which obfuscate
|
||||
valuable information about the media. This information is decrypted
|
||||
via base class _decrypt_url function providing media quality and
|
||||
media url
|
||||
"""
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
|
||||
audio_id, 'Downloading url information', query={'q': 'v2'})
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
formats = []
|
||||
for quality, audio_url in self._decrypt_url(png):
|
||||
ext = determine_ext(audio_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
audio_url, audio_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
audio_url, audio_id, 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': audio_url,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
f'https://www.rtve.es/api/audios/{audio_id}.json',
|
||||
audio_id)['page']['items'][0]
|
||||
metadata = self._download_json(
|
||||
f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0]
|
||||
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(audio_id, media_type='audios')
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': info['title'].strip(),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
'series': try_get(info, lambda x: x['programInfo']['title']),
|
||||
'formats': self._extract_png_formats(audio_id),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**self._parse_metadata(metadata),
|
||||
}
|
||||
|
||||
|
||||
class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'rtve.es:infantil'
|
||||
IE_DESC = 'RTVE infantil'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
|
||||
'info_dict': {
|
||||
'id': '3040283',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maneras de vivir',
|
||||
'thumbnail': r're:https?://.+/1426182947956\.JPG',
|
||||
'duration': 357.958,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}]
|
||||
|
||||
|
||||
class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
class RTVELiveIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)',
|
||||
r'https?://(?:www\.)?rtve\.es/play/videos/directo/[^/?#]+/(?P<id>[a-zA-Z0-9-]+)',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/directo/la-1/',
|
||||
'info_dict': {
|
||||
'id': 'la-1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'live_status': 'is_live',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'thumbnail': r're:https://img\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
'params': {'skip_download': 'live stream'},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/videos/directo/deportes/tdp/',
|
||||
'info_dict': {
|
||||
'id': 'tdp',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'thumbnail': r're:https://img2\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
},
|
||||
'params': {'skip_download': 'live stream'},
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/play/videos/directo/canales-lineales/la-1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||
title = remove_start(title, 'Estoy viendo ')
|
||||
|
||||
vidplayer_id = self._search_regex(
|
||||
(r'playerId=player([0-9]+)',
|
||||
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||
r'data-id=["\'](\d+)'),
|
||||
webpage, 'internal video ID')
|
||||
data_setup = self._search_json(
|
||||
r'<div[^>]+class="[^"]*videoPlayer[^"]*"[^>]*data-setup=\'',
|
||||
webpage, 'data_setup', video_id)
|
||||
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(data_setup['idAsset'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_png_formats(vidplayer_id),
|
||||
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||
'title': self._html_extract_title(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class RTVETelevisionIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:television'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/?#]+/[^/?#]+/(?P<id>\d+).shtml'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
|
||||
'url': 'https://www.rtve.es/television/20091103/video-inedito-del-8o-programa/299020.shtml',
|
||||
'info_dict': {
|
||||
'id': '3069778',
|
||||
'id': '572515',
|
||||
'ext': 'mp4',
|
||||
'title': 'Documentos TV - La revolución del móvil',
|
||||
'duration': 3496.948,
|
||||
'title': 'Clase inédita',
|
||||
'duration': 335.817,
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'series': 'El coro de la cárcel',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -332,11 +366,8 @@ class RTVETelevisionIE(InfoExtractor):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
alacarta_url = self._search_regex(
|
||||
r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&',
|
||||
webpage, 'alacarta url', default=None)
|
||||
if alacarta_url is None:
|
||||
raise ExtractorError(
|
||||
'The webpage doesn\'t contain any video', expected=True)
|
||||
play_url = self._html_search_meta('contentUrl', webpage)
|
||||
if play_url is None:
|
||||
raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
|
||||
|
||||
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
|
||||
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
|
||||
|
||||
@@ -697,7 +697,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
try:
|
||||
return self._extract_info_dict(info, full_title, token)
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 429:
|
||||
raise
|
||||
self.report_warning(
|
||||
'You have reached the API rate limit, which is ~600 requests per '
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SproutIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
|
||||
'info_dict': {
|
||||
'id': 'bm0foJFaTKqb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Robot Bike Race',
|
||||
'description': 'md5:436b1d97117cc437f54c383f4debc66d',
|
||||
'timestamp': 1606148940,
|
||||
'upload_date': '20201123',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.universalkids.com/watch/robot-bike-race',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mpx_metadata = self._download_json(
|
||||
# http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
|
||||
'https://www.universalkids.com/_api/videos/' + display_id,
|
||||
display_id)['mpxMetadata']
|
||||
media_pid = mpx_metadata['mediaPid']
|
||||
theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if mpx_metadata.get('entitlement') == 'auth':
|
||||
query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(
|
||||
update_url_query(theplatform_url, query), {
|
||||
'force_smil_url': True,
|
||||
'geo_countries': self._GEO_COUNTRIES,
|
||||
})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_pid,
|
||||
'url': theplatform_url,
|
||||
'series': mpx_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
|
||||
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
@@ -471,8 +471,7 @@ class SVTPageIE(SVTBaseIE):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
urql_state = self._search_json(
|
||||
r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
|
||||
urql_state = self._search_json(r'urqlState\s*[=:]', webpage, 'json data', display_id)
|
||||
|
||||
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import re
|
||||
import time
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .once import OnceIE
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -26,7 +25,7 @@ default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||
|
||||
|
||||
class ThePlatformBaseIE(OnceIE):
|
||||
class ThePlatformBaseIE(AdobePassIE):
|
||||
_TP_TLD = 'com'
|
||||
|
||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||
@@ -54,16 +53,13 @@ class ThePlatformBaseIE(OnceIE):
|
||||
|
||||
formats = []
|
||||
for _format in smil_formats:
|
||||
if OnceIE.suitable(_format['url']):
|
||||
formats.extend(self._extract_once_formats(_format['url']))
|
||||
else:
|
||||
media_url = _format['url']
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
hdnea2 = self._get_cookies(media_url).get('hdnea2')
|
||||
if hdnea2:
|
||||
_format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
|
||||
media_url = _format['url']
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
hdnea2 = self._get_cookies(media_url).get('hdnea2')
|
||||
if hdnea2:
|
||||
_format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
|
||||
|
||||
formats.append(_format)
|
||||
formats.append(_format)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
@@ -129,7 +125,7 @@ class ThePlatformBaseIE(OnceIE):
|
||||
return self._parse_theplatform_metadata(info)
|
||||
|
||||
|
||||
class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
class ThePlatformIE(ThePlatformBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
|
||||
121
yt_dlp/extractor/toutiao.py
Normal file
121
yt_dlp/extractor/toutiao.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class ToutiaoIE(InfoExtractor):
|
||||
IE_NAME = 'toutiao'
|
||||
IE_DESC = '今日头条'
|
||||
|
||||
_VALID_URL = r'https?://www\.toutiao\.com/video/(?P<id>\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.toutiao.com/video/7505382061495176511/',
|
||||
'info_dict': {
|
||||
'id': '7505382061495176511',
|
||||
'ext': 'mp4',
|
||||
'title': '新疆多地现不明飞行物,目击者称和月亮一样亮,几秒内突然加速消失,气象部门回应',
|
||||
'comment_count': int,
|
||||
'duration': 9.753,
|
||||
'like_count': int,
|
||||
'release_date': '20250517',
|
||||
'release_timestamp': 1747483344,
|
||||
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
|
||||
'uploader': '极目新闻',
|
||||
'uploader_id': 'MS4wLjABAAAAeateBb9Su8I3MJOZozmvyzWktmba5LMlliRDz1KffnM',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.toutiao.com/video/7479446610359878153/',
|
||||
'info_dict': {
|
||||
'id': '7479446610359878153',
|
||||
'ext': 'mp4',
|
||||
'title': '小伙竟然利用两块磁铁制作成磁力减震器,简直太有创意了!',
|
||||
'comment_count': int,
|
||||
'duration': 118.374,
|
||||
'like_count': int,
|
||||
'release_date': '20250308',
|
||||
'release_timestamp': 1741444368,
|
||||
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
|
||||
'uploader': '小莉创意发明',
|
||||
'uploader_id': 'MS4wLjABAAAA4f7d4mwtApALtHIiq-QM20dwXqe32NUz0DeWF7wbHKw',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._get_cookies('https://www.toutiao.com').get('ttwid'):
|
||||
return
|
||||
|
||||
urlh = self._request_webpage(
|
||||
'https://ttwid.bytedance.com/ttwid/union/register/', None,
|
||||
'Fetching ttwid', 'Unable to fetch ttwid', headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'aid': 24,
|
||||
'needFid': False,
|
||||
'region': 'cn',
|
||||
'service': 'www.toutiao.com',
|
||||
'union': True,
|
||||
}).encode(),
|
||||
)
|
||||
|
||||
if ttwid := try_call(lambda: self._get_cookies(urlh.url)['ttwid'].value):
|
||||
self._set_cookie('.toutiao.com', 'ttwid', ttwid)
|
||||
return
|
||||
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = traverse_obj(webpage, (
|
||||
{find_element(tag='script', id='RENDER_DATA')},
|
||||
{urllib.parse.unquote}, {json.loads}, 'data', 'initialVideo',
|
||||
))
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, (
|
||||
'videoPlayInfo', 'video_list', lambda _, v: v['main_url'],
|
||||
)):
|
||||
formats.append({
|
||||
'url': video['main_url'],
|
||||
**traverse_obj(video, ('video_meta', {
|
||||
'acodec': ('audio_profile', {str}),
|
||||
'asr': ('audio_sample_rate', {int_or_none}),
|
||||
'audio_channels': ('audio_channels', {float_or_none}, {int_or_none}),
|
||||
'ext': ('vtype', {str}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'format_id': ('definition', {str}),
|
||||
'fps': ('fps', {int_or_none}),
|
||||
'height': ('vheight', {int_or_none}),
|
||||
'tbr': ('real_bitrate', {float_or_none(scale=1000)}),
|
||||
'vcodec': ('codec_type', {str}),
|
||||
'width': ('vwidth', {int_or_none}),
|
||||
})),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'duration': ('videoPlayInfo', 'video_duration', {float_or_none}),
|
||||
'like_count': ('repinCount', {int_or_none}),
|
||||
'release_timestamp': ('publishTime', {int_or_none}),
|
||||
'thumbnail': (('poster', 'coverUrl'), {url_or_none}, any),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('userInfo', 'name', {str}),
|
||||
'uploader_id': ('userInfo', 'userId', {str_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'webpage_url': ('detailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -2,12 +2,13 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TV2DKIE(InfoExtractor):
|
||||
@@ -21,35 +22,46 @@ class TV2DKIE(InfoExtractor):
|
||||
tv2fyn|
|
||||
tv2east|
|
||||
tv2lorry|
|
||||
tv2nord
|
||||
tv2nord|
|
||||
tv2kosmopol
|
||||
)\.dk/
|
||||
(:[^/]+/)*
|
||||
(?:[^/?#]+/)*
|
||||
(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
|
||||
'info_dict': {
|
||||
'id': '0_52jmwa0p',
|
||||
'id': 'sPp5z21q',
|
||||
'ext': 'mp4',
|
||||
'title': '19:30 - 28. okt. 2019',
|
||||
'timestamp': 1572290248,
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/sPp5z21q/poster.jpg?width=720',
|
||||
'timestamp': 1572287400,
|
||||
'upload_date': '20191028',
|
||||
'uploader_id': 'tvsyd',
|
||||
'duration': 1347,
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
}, {
|
||||
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
|
||||
'info_dict': {
|
||||
'id': '1_7iwll9n0',
|
||||
'id': 'oD9cyq0m',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20211027',
|
||||
'title': 'Gadekamp #6 - Højhuse i København',
|
||||
'uploader_id': 'tv2lorry',
|
||||
'timestamp': 1635345229,
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/oD9cyq0m/poster.jpg?width=720',
|
||||
'timestamp': 1635348600,
|
||||
'upload_date': '20211027',
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
}, {
|
||||
'url': 'https://www.tvsyd.dk/haderslev/x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
|
||||
'info_dict': {
|
||||
'id': 'x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://www.tv2ostjylland.dk/aarhus/dom-kan-fa-alvorlige-konsekvenser',
|
||||
'info_dict': {
|
||||
'id': 'dom-kan-fa-alvorlige-konsekvenser',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
|
||||
'only_matching': True,
|
||||
@@ -71,40 +83,22 @@ class TV2DKIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tv2kosmopol.dk/metropolen/chaufforer-beordres-til-at-kore-videre-i-ulovlige-busser-med-rode-advarselslamper',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
search_space = traverse_obj(webpage, {find_element(tag='article')}) or webpage
|
||||
|
||||
entries = []
|
||||
player_ids = traverse_obj(
|
||||
re.findall(r'x-data="(?:video_player|simple_player)\(({[^"]+})', search_space),
|
||||
(..., {js_to_json}, {json.loads}, ('jwpMediaId', 'videoId'), {str}))
|
||||
|
||||
def add_entry(partner_id, kaltura_id):
|
||||
entries.append(self.url_result(
|
||||
f'kaltura:{partner_id}:{kaltura_id}', 'Kaltura',
|
||||
video_id=kaltura_id))
|
||||
|
||||
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
|
||||
video = extract_attributes(video_el)
|
||||
kaltura_id = video.get('data-entryid')
|
||||
if not kaltura_id:
|
||||
continue
|
||||
partner_id = video.get('data-partnerid')
|
||||
if not partner_id:
|
||||
continue
|
||||
add_entry(partner_id, kaltura_id)
|
||||
if not entries:
|
||||
kaltura_id = self._search_regex(
|
||||
(r'entry_id\s*:\s*["\']([0-9a-z_]+)',
|
||||
r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
|
||||
partner_id = self._search_regex(
|
||||
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
||||
'partner id')
|
||||
add_entry(partner_id, kaltura_id)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries)
|
||||
return self.playlist_from_matches(
|
||||
player_ids, video_id, getter=lambda x: f'jwplatform:{x}', ie=JWPlatformIE)
|
||||
|
||||
|
||||
class TV2DKBornholmPlayIE(InfoExtractor):
|
||||
|
||||
@@ -513,7 +513,7 @@ class TVPVODBaseIE(InfoExtractor):
|
||||
|
||||
class TVPVODVideoIE(TVPVODBaseIE):
|
||||
IE_NAME = 'tvp:vod'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek--?\d+,S-?\d+E-?\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
|
||||
@@ -568,6 +568,9 @@ class TVPVODVideoIE(TVPVODBaseIE):
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 're:https?://.+',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.tvp.pl/informacje-i-publicystyka,205/konskie-2025-debata-przedwyborcza-odcinki,2028435/odcinek--1,S01E-1,2028419',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,13 +1,21 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, remove_end, unified_timestamp, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
parse_qs,
|
||||
remove_end,
|
||||
require,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TvwIE(InfoExtractor):
|
||||
IE_NAME = 'tvw'
|
||||
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
|
||||
'md5': '9ceb94fe2bb7fd726f74f16356825703',
|
||||
@@ -115,3 +123,43 @@ class TvwIE(InfoExtractor):
|
||||
'is_live': ('eventStatus', {lambda x: x == 'live'}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class TvwTvChannelsIE(InfoExtractor):
|
||||
IE_NAME = 'tvw:tvchannels'
|
||||
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvw.org/tvchannels/air/',
|
||||
'info_dict': {
|
||||
'id': 'air',
|
||||
'ext': 'mp4',
|
||||
'title': r're:TVW Cable Channel Live Stream',
|
||||
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tvw.org/tvchannels/tvw2/',
|
||||
'info_dict': {
|
||||
'id': 'tvw2',
|
||||
'ext': 'mp4',
|
||||
'title': r're:TVW-2 Broadcast Channel',
|
||||
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_url = traverse_obj(webpage, (
|
||||
{find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},
|
||||
'src', {parse_qs}, 'encoder', 0, {json.loads}, 'live247URI', {url_or_none}, {require('stream url')}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True),
|
||||
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import itertools
|
||||
import re
|
||||
|
||||
@@ -14,12 +15,14 @@ from ..utils import (
|
||||
parse_duration,
|
||||
qualities,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TwitCastingIE(InfoExtractor):
|
||||
@@ -138,13 +141,7 @@ class TwitCastingIE(InfoExtractor):
|
||||
r'data-toggle="true"[^>]+datetime="([^"]+)"',
|
||||
webpage, 'datetime', None))
|
||||
|
||||
stream_server_data = self._download_json(
|
||||
f'https://twitcasting.tv/streamserver.php?target={uploader_id}&mode=client', video_id,
|
||||
'Downloading live info', fatal=False)
|
||||
|
||||
is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"'])
|
||||
if not traverse_obj(stream_server_data, 'llfmp4') and is_live:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
base_dict = {
|
||||
'title': title,
|
||||
@@ -165,30 +162,43 @@ class TwitCastingIE(InfoExtractor):
|
||||
return [data_movie_url]
|
||||
|
||||
m3u8_urls = (try_get(webpage, find_dmu, list)
|
||||
or traverse_obj(video_js_data, (..., 'source', 'url'))
|
||||
or ([f'https://twitcasting.tv/{uploader_id}/metastream.m3u8'] if is_live else None))
|
||||
if not m3u8_urls:
|
||||
raise ExtractorError('Failed to get m3u8 playlist')
|
||||
or traverse_obj(video_js_data, (..., 'source', 'url')))
|
||||
|
||||
if is_live:
|
||||
m3u8_url = m3u8_urls[0]
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls',
|
||||
live=True, headers=self._M3U8_HEADERS)
|
||||
stream_data = self._download_json(
|
||||
'https://twitcasting.tv/streamserver.php',
|
||||
video_id, 'Downloading live info', query={
|
||||
'target': uploader_id,
|
||||
'mode': 'client',
|
||||
'player': 'pc_web',
|
||||
})
|
||||
|
||||
if traverse_obj(stream_server_data, ('hls', 'source')):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='source',
|
||||
live=True, query={'mode': 'source'},
|
||||
note='Downloading source quality m3u8',
|
||||
headers=self._M3U8_HEADERS, fatal=False))
|
||||
password_params = {
|
||||
'word': hashlib.md5(video_password.encode()).hexdigest(),
|
||||
} if video_password else None
|
||||
|
||||
formats = []
|
||||
# low: 640x360, medium: 1280x720, high: 1920x1080
|
||||
qq = qualities(['low', 'medium', 'high'])
|
||||
for quality, m3u8_url in traverse_obj(stream_data, (
|
||||
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': update_url_query(m3u8_url, password_params),
|
||||
'format_id': f'hls-{quality}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(quality),
|
||||
'protocol': 'm3u8',
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
})
|
||||
|
||||
if websockets:
|
||||
qq = qualities(['base', 'mobilesource', 'main'])
|
||||
streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {}
|
||||
for mode, ws_url in streams.items():
|
||||
for mode, ws_url in traverse_obj(stream_data, (
|
||||
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': ws_url,
|
||||
'url': update_url_query(ws_url, password_params),
|
||||
'format_id': f'ws-{mode}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(mode),
|
||||
@@ -197,10 +207,15 @@ class TwitCastingIE(InfoExtractor):
|
||||
'protocol': 'websocket_frag',
|
||||
})
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required()
|
||||
|
||||
infodict = {
|
||||
'formats': formats,
|
||||
'_format_sort_fields': ('source', ),
|
||||
}
|
||||
elif not m3u8_urls:
|
||||
raise ExtractorError('Failed to get m3u8 playlist')
|
||||
elif len(m3u8_urls) == 1:
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS)
|
||||
|
||||
@@ -187,7 +187,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
'url': thumbnail,
|
||||
}] if thumbnail else None
|
||||
|
||||
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
|
||||
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature, live_from_start=False):
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
|
||||
'allow_source': 'true',
|
||||
@@ -204,7 +204,10 @@ class TwitchBaseIE(InfoExtractor):
|
||||
for fmt in formats:
|
||||
if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
|
||||
# mpegts does not yet have proper support for av1
|
||||
fmt['downloader_options'] = {'ffmpeg_args_out': ['-f', 'mp4']}
|
||||
fmt.setdefault('downloader_options', {}).update({'ffmpeg_args_out': ['-f', 'mp4']})
|
||||
if live_from_start:
|
||||
fmt.setdefault('downloader_options', {}).update({'ffmpeg_args': ['-live_start_index', '0']})
|
||||
fmt['is_from_start'] = True
|
||||
|
||||
return formats
|
||||
|
||||
@@ -550,7 +553,8 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
access_token = self._download_access_token(vod_id, 'video', 'id')
|
||||
|
||||
formats = self._extract_twitch_m3u8_formats(
|
||||
'vod', vod_id, access_token['value'], access_token['signature'])
|
||||
'vod', vod_id, access_token['value'], access_token['signature'],
|
||||
live_from_start=self.get_param('live_from_start'))
|
||||
formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))
|
||||
|
||||
self._prefer_source(formats)
|
||||
@@ -633,6 +637,10 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
_PAGE_LIMIT = 100
|
||||
|
||||
def _entries(self, channel_name, *args):
|
||||
"""
|
||||
Subclasses must define _make_variables() and _extract_entry(),
|
||||
as well as set _OPERATION_NAME, _ENTRY_KIND, _EDGE_KIND, and _NODE_KIND
|
||||
"""
|
||||
cursor = None
|
||||
variables_common = self._make_variables(channel_name, *args)
|
||||
entries_key = f'{self._ENTRY_KIND}s'
|
||||
@@ -672,7 +680,22 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
break
|
||||
|
||||
|
||||
class TwitchVideosIE(TwitchPlaylistBaseIE):
|
||||
class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
|
||||
_OPERATION_NAME = 'FilterableVideoTower_Videos'
|
||||
_ENTRY_KIND = 'video'
|
||||
_EDGE_KIND = 'VideoEdge'
|
||||
_NODE_KIND = 'Video'
|
||||
|
||||
@staticmethod
|
||||
def _make_variables(channel_name, broadcast_type, sort):
|
||||
return {
|
||||
'channelOwnerLogin': channel_name,
|
||||
'broadcastType': broadcast_type,
|
||||
'videoSort': sort.upper(),
|
||||
}
|
||||
|
||||
|
||||
class TwitchVideosIE(TwitchVideosBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -751,11 +774,6 @@ class TwitchVideosIE(TwitchPlaylistBaseIE):
|
||||
'views': 'Popular',
|
||||
}
|
||||
|
||||
_OPERATION_NAME = 'FilterableVideoTower_Videos'
|
||||
_ENTRY_KIND = 'video'
|
||||
_EDGE_KIND = 'VideoEdge'
|
||||
_NODE_KIND = 'Video'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
@@ -764,14 +782,6 @@ class TwitchVideosIE(TwitchPlaylistBaseIE):
|
||||
TwitchVideosCollectionsIE))
|
||||
else super().suitable(url))
|
||||
|
||||
@staticmethod
|
||||
def _make_variables(channel_name, broadcast_type, sort):
|
||||
return {
|
||||
'channelOwnerLogin': channel_name,
|
||||
'broadcastType': broadcast_type,
|
||||
'videoSort': sort.upper(),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_entry(node):
|
||||
return _make_video_result(node)
|
||||
@@ -919,7 +929,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
|
||||
playlist_title=f'{channel_name} - Collections')
|
||||
|
||||
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
class TwitchStreamIE(TwitchVideosBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -982,6 +992,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}]
|
||||
_PAGE_LIMIT = 1
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@@ -995,6 +1006,20 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
TwitchClipsIE))
|
||||
else super().suitable(url))
|
||||
|
||||
@staticmethod
|
||||
def _extract_entry(node):
|
||||
if not isinstance(node, dict) or not node.get('id'):
|
||||
return None
|
||||
video_id = node['id']
|
||||
return {
|
||||
'_type': 'url',
|
||||
'ie_key': TwitchVodIE.ie_key(),
|
||||
'id': 'v' + video_id,
|
||||
'url': f'https://www.twitch.tv/videos/{video_id}',
|
||||
'title': node.get('title'),
|
||||
'timestamp': unified_timestamp(node.get('publishedAt')) or 0,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url).lower()
|
||||
|
||||
@@ -1029,6 +1054,16 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
if not stream:
|
||||
raise UserNotLive(video_id=channel_name)
|
||||
|
||||
timestamp = unified_timestamp(stream.get('createdAt'))
|
||||
|
||||
if self.get_param('live_from_start'):
|
||||
self.to_screen(f'{channel_name}: Extracting VOD to download live from start')
|
||||
entry = next(self._entries(channel_name, None, 'time'), None)
|
||||
if entry and entry.pop('timestamp') >= (timestamp or float('inf')):
|
||||
return entry
|
||||
self.report_warning(
|
||||
'Unable to extract the VOD associated with this livestream', video_id=channel_name)
|
||||
|
||||
access_token = self._download_access_token(
|
||||
channel_name, 'stream', 'channelName')
|
||||
|
||||
@@ -1038,7 +1073,6 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
self._prefer_source(formats)
|
||||
|
||||
view_count = stream.get('viewers')
|
||||
timestamp = unified_timestamp(stream.get('createdAt'))
|
||||
|
||||
sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
|
||||
uploader = sq_user.get('displayName')
|
||||
@@ -1225,8 +1259,8 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
'channel_id': ('broadcaster', 'id', {str}),
|
||||
'channel_follower_count': ('broadcaster', 'followers', 'totalCount', {int_or_none}),
|
||||
'channel_is_verified': ('broadcaster', 'isPartner', {bool}),
|
||||
'uploader': ('broadcaster', 'displayName', {str}),
|
||||
'uploader_id': ('broadcaster', 'id', {str}),
|
||||
'uploader': ('curator', 'displayName', {str}),
|
||||
'uploader_id': ('curator', 'id', {str}),
|
||||
'categories': ('game', 'displayName', {str}, filter, all, filter),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -20,7 +20,6 @@ from ..utils import (
|
||||
remove_end,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
truncate_string,
|
||||
try_call,
|
||||
try_get,
|
||||
@@ -29,6 +28,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class TwitterBaseIE(InfoExtractor):
|
||||
@@ -1221,20 +1221,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
}]
|
||||
|
||||
_MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
|
||||
|
||||
@property
|
||||
def _GRAPHQL_ENDPOINT(self):
|
||||
if self.is_logged_in:
|
||||
return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
|
||||
return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||
_GRAPHQL_ENDPOINT = '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||
|
||||
def _graphql_to_legacy(self, data, twid):
|
||||
result = traverse_obj(data, (
|
||||
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
|
||||
lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
|
||||
'tweet_results', 'result', ('tweet', None), {dict},
|
||||
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
|
||||
data, ('tweetResult', 'result', {dict}), default={})
|
||||
result = traverse_obj(data, ('tweetResult', 'result', {dict})) or {}
|
||||
|
||||
typename = result.get('__typename')
|
||||
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
|
||||
@@ -1278,37 +1268,6 @@ class TwitterIE(TwitterBaseIE):
|
||||
|
||||
def _build_graphql_query(self, media_id):
|
||||
return {
|
||||
'variables': {
|
||||
'focalTweetId': media_id,
|
||||
'includePromotedContent': True,
|
||||
'with_rux_injections': False,
|
||||
'withBirdwatchNotes': True,
|
||||
'withCommunity': True,
|
||||
'withDownvotePerspective': False,
|
||||
'withQuickPromoteEligibilityTweetFields': True,
|
||||
'withReactionsMetadata': False,
|
||||
'withReactionsPerspective': False,
|
||||
'withSuperFollowsTweetFields': True,
|
||||
'withSuperFollowsUserFields': True,
|
||||
'withV2Timeline': True,
|
||||
'withVoice': True,
|
||||
},
|
||||
'features': {
|
||||
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
|
||||
'interactive_text_enabled': True,
|
||||
'responsive_web_edit_tweet_api_enabled': True,
|
||||
'responsive_web_enhance_cards_enabled': True,
|
||||
'responsive_web_graphql_timeline_navigation_enabled': False,
|
||||
'responsive_web_text_conversations_enabled': False,
|
||||
'responsive_web_uc_gql_enabled': True,
|
||||
'standardized_nudges_misinfo': True,
|
||||
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
||||
'tweetypie_unmention_optimization_enabled': True,
|
||||
'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
|
||||
'verified_phone_label_enabled': False,
|
||||
'vibe_api_enabled': True,
|
||||
},
|
||||
} if self.is_logged_in else {
|
||||
'variables': {
|
||||
'tweetId': media_id,
|
||||
'withCommunity': False,
|
||||
@@ -1383,7 +1342,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'tweet_mode': 'extended',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 429:
|
||||
raise
|
||||
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
|
||||
status = self._call_syndication_api(twid)
|
||||
@@ -1637,8 +1596,8 @@ class TwitterAmplifyIE(TwitterBaseIE):
|
||||
|
||||
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
IE_NAME = 'twitter:broadcast'
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
|
||||
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?P<type>broadcasts|events)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# untitled Periscope video
|
||||
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
|
||||
@@ -1646,6 +1605,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1yNGaQLWpejGj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andrea May Sahouri - Periscope Broadcast',
|
||||
'display_id': '1yNGaQLWpejGj',
|
||||
'uploader': 'Andrea May Sahouri',
|
||||
'uploader_id': 'andreamsahouri',
|
||||
'uploader_url': 'https://twitter.com/andreamsahouri',
|
||||
@@ -1653,6 +1613,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20200601',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
|
||||
@@ -1660,6 +1622,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1ZkKzeyrPbaxv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starship | SN10 | High-Altitude Flight Test',
|
||||
'display_id': '1ZkKzeyrPbaxv',
|
||||
'uploader': 'SpaceX',
|
||||
'uploader_id': 'SpaceX',
|
||||
'uploader_url': 'https://twitter.com/SpaceX',
|
||||
@@ -1667,6 +1630,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20210303',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
|
||||
@@ -1674,6 +1639,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1OyKAVQrgzwGb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starship Flight Test',
|
||||
'display_id': '1OyKAVQrgzwGb',
|
||||
'uploader': 'SpaceX',
|
||||
'uploader_id': 'SpaceX',
|
||||
'uploader_url': 'https://twitter.com/SpaceX',
|
||||
@@ -1681,21 +1647,58 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20230420',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://x.com/i/events/1910629646300762112',
|
||||
'info_dict': {
|
||||
'id': '1LyxBWDRNqyKN',
|
||||
'ext': 'mp4',
|
||||
'title': '#ガンニバル ウォッチパーティー',
|
||||
'concurrent_view_count': int,
|
||||
'display_id': '1910629646300762112',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20250423',
|
||||
'release_timestamp': 1745409000,
|
||||
'tags': ['ガンニバル'],
|
||||
'thumbnail': r're:https?://[^?#]+\.jpg\?token=',
|
||||
'timestamp': 1745403328,
|
||||
'upload_date': '20250423',
|
||||
'uploader': 'ディズニープラス公式',
|
||||
'uploader_id': 'DisneyPlusJP',
|
||||
'uploader_url': 'https://twitter.com/DisneyPlusJP',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcast_id = self._match_id(url)
|
||||
broadcast_type, display_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
if broadcast_type == 'events':
|
||||
timeline = self._call_api(
|
||||
f'live_event/1/{display_id}/timeline.json', display_id)
|
||||
broadcast_id = traverse_obj(timeline, (
|
||||
'twitter_objects', 'broadcasts', ..., ('id', 'broadcast_id'),
|
||||
{str}, any, {require('broadcast ID')}))
|
||||
else:
|
||||
broadcast_id = display_id
|
||||
|
||||
broadcast = self._call_api(
|
||||
'broadcasts/show.json', broadcast_id,
|
||||
{'ids': broadcast_id})['broadcasts'][broadcast_id]
|
||||
if not broadcast:
|
||||
raise ExtractorError('Broadcast no longer exists', expected=True)
|
||||
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
||||
info['title'] = broadcast.get('status') or info.get('title')
|
||||
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
|
||||
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': broadcast.get('status') or info.get('title'),
|
||||
'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
|
||||
'uploader_url': format_field(
|
||||
broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),
|
||||
})
|
||||
if info['live_status'] == 'is_upcoming':
|
||||
self.raise_no_formats('This live broadcast has not yet started', expected=True)
|
||||
return info
|
||||
|
||||
media_key = broadcast['media_key']
|
||||
@@ -1717,21 +1720,22 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
|
||||
'url': 'https://twitter.com/i/spaces/1OwxWwQOPlNxQ',
|
||||
'info_dict': {
|
||||
'id': '1RDxlgyvNXzJL',
|
||||
'id': '1OwxWwQOPlNxQ',
|
||||
'ext': 'm4a',
|
||||
'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
|
||||
'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
|
||||
'uploader': r're:Lucio Di Gaetano.*?',
|
||||
'uploader_id': 'luciodigaetano',
|
||||
'title': 'Everybody in: @mtbarra & @elonmusk discuss the future of EV charging',
|
||||
'description': 'Twitter Space participated by Elon Musk',
|
||||
'live_status': 'was_live',
|
||||
'timestamp': 1659877956,
|
||||
'upload_date': '20220807',
|
||||
'release_timestamp': 1659904215,
|
||||
'release_date': '20220807',
|
||||
'release_date': '20230608',
|
||||
'release_timestamp': 1686256230,
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1686254250,
|
||||
'upload_date': '20230608',
|
||||
'uploader': 'Mary Barra',
|
||||
'uploader_id': 'mtbarra',
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# post_live/TimedOut but downloadable
|
||||
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
|
||||
@@ -1743,9 +1747,10 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': 'Google Cloud',
|
||||
'uploader_id': 'googlecloud',
|
||||
'live_status': 'post_live',
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1681409554,
|
||||
'upload_date': '20230413',
|
||||
'release_timestamp': 1681839000,
|
||||
'release_timestamp': 1681839082,
|
||||
'release_date': '20230418',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
'container': 'm4a_dash', # audio-only format fixup is applied
|
||||
@@ -1762,6 +1767,9 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': '息根とめる',
|
||||
'uploader_id': 'tomeru_ikinone',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20230601',
|
||||
'release_timestamp': 1685617200,
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1685617198,
|
||||
'upload_date': '20230601',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
@@ -1779,9 +1787,10 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': 'Candace Owens',
|
||||
'uploader_id': 'RealCandaceO',
|
||||
'live_status': 'was_live',
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1723931351,
|
||||
'upload_date': '20240817',
|
||||
'release_timestamp': 1723932000,
|
||||
'release_timestamp': 1723932056,
|
||||
'release_date': '20240817',
|
||||
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
|
||||
},
|
||||
@@ -1861,18 +1870,21 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
|
||||
return {
|
||||
'id': space_id,
|
||||
'title': metadata.get('title'),
|
||||
'description': f'Twitter Space participated by {participants}',
|
||||
'uploader': traverse_obj(
|
||||
metadata, ('creator_results', 'result', 'legacy', 'name')),
|
||||
'uploader_id': traverse_obj(
|
||||
metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
|
||||
'live_status': live_status,
|
||||
'release_timestamp': try_call(
|
||||
lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
|
||||
'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
|
||||
'formats': formats,
|
||||
'http_headers': headers,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
# started_at is None when stream is_upcoming so fallback to scheduled_start for --wait-for-video
|
||||
'release_timestamp': (('started_at', 'scheduled_start'), {int_or_none(scale=1000)}, any),
|
||||
'timestamp': ('created_at', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
**traverse_obj(metadata, ('creator_results', 'result', 'legacy', {
|
||||
'uploader': ('name', {str}),
|
||||
'uploader_id': ('screen_name', {str_or_none}),
|
||||
'thumbnail': ('profile_image_url_https', {lambda x: x.replace('_normal', '_400x400')}, {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -13,10 +14,12 @@ from ..utils import (
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
jwt_decode_hs256,
|
||||
merge_dicts,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
@@ -39,6 +42,18 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
_LOGIN_REQUIRED = False
|
||||
_LOGIN_URL = 'https://vimeo.com/log_in'
|
||||
_REFERER_HINT = (
|
||||
'Cannot download embed-only video without embedding URL. Please call yt-dlp '
|
||||
'with the URL of the page that embeds this video.')
|
||||
_IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw=='
|
||||
_IOS_CLIENT_HEADERS = {
|
||||
'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
|
||||
'Accept-Language': 'en',
|
||||
'User-Agent': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0',
|
||||
}
|
||||
_IOS_OAUTH_CACHE_KEY = 'oauth-token-ios'
|
||||
_ios_oauth_token = None
|
||||
_viewer_info = None
|
||||
|
||||
@staticmethod
|
||||
def _smuggle_referrer(url, referrer_url):
|
||||
@@ -52,8 +67,21 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
headers['Referer'] = data['referer']
|
||||
return url, data, headers
|
||||
|
||||
def _jwt_is_expired(self, token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 120
|
||||
|
||||
def _fetch_viewer_info(self, display_id=None, fatal=True):
|
||||
if self._viewer_info and not self._jwt_is_expired(self._viewer_info['jwt']):
|
||||
return self._viewer_info
|
||||
|
||||
self._viewer_info = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', display_id, 'Downloading web token info',
|
||||
'Failed to download web token info', fatal=fatal, headers={'Accept': 'application/json'})
|
||||
|
||||
return self._viewer_info
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
|
||||
viewer = self._fetch_viewer_info()
|
||||
data = {
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
@@ -88,13 +116,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
expected=True)
|
||||
return password
|
||||
|
||||
def _verify_video_password(self, video_id, password, token):
|
||||
url = f'https://vimeo.com/{video_id}'
|
||||
def _verify_video_password(self, video_id, path=None):
|
||||
video_password = self._get_video_password()
|
||||
token = self._fetch_viewer_info(video_id)['xsrft']
|
||||
url = join_nonempty('https://vimeo.com', path, video_id, delim='/')
|
||||
try:
|
||||
return self._download_webpage(
|
||||
self._request_webpage(
|
||||
f'{url}/password', video_id,
|
||||
'Submitting video password', data=json.dumps({
|
||||
'password': password,
|
||||
'password': video_password,
|
||||
'token': token,
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
'Accept': '*/*',
|
||||
@@ -106,6 +136,10 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
raise ExtractorError('Wrong password', expected=True)
|
||||
raise
|
||||
|
||||
def _extract_config_url(self, webpage, **kwargs):
|
||||
return self._html_search_regex(
|
||||
r'\bdata-config-url="([^"]+)"', webpage, 'config URL', **kwargs)
|
||||
|
||||
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
|
||||
vimeo_config = self._search_regex(
|
||||
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
|
||||
@@ -153,6 +187,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
sep_pattern = r'/sep/video/'
|
||||
for files_type in ('hls', 'dash'):
|
||||
for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
|
||||
# TODO: Also extract 'avc_url'? Investigate if there are 'hevc_url', 'av1_url'?
|
||||
manifest_url = cdn_data.get('url')
|
||||
if not manifest_url:
|
||||
continue
|
||||
@@ -233,26 +268,48 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': traverse_obj(live_event, ('ingest', 'scheduled_start_time', {parse_iso8601})),
|
||||
'release_timestamp': traverse_obj(live_event, ('ingest', (
|
||||
('scheduled_start_time', {parse_iso8601}),
|
||||
('start_time', {int_or_none}),
|
||||
), any)),
|
||||
# Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
|
||||
# at the same time without actual units specified.
|
||||
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
|
||||
}
|
||||
|
||||
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
|
||||
def _fetch_oauth_token(self):
|
||||
if not self._ios_oauth_token:
|
||||
self._ios_oauth_token = self.cache.load(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY)
|
||||
|
||||
if not self._ios_oauth_token:
|
||||
self._ios_oauth_token = self._download_json(
|
||||
'https://api.vimeo.com/oauth/authorize/client', None,
|
||||
'Fetching OAuth token', 'Failed to fetch OAuth token',
|
||||
headers={
|
||||
'Authorization': f'Basic {self._IOS_CLIENT_AUTH}',
|
||||
**self._IOS_CLIENT_HEADERS,
|
||||
}, data=urlencode_postdata({
|
||||
'grant_type': 'client_credentials',
|
||||
'scope': 'private public create edit delete interact upload purchased stats',
|
||||
}, quote_via=urllib.parse.quote))['access_token']
|
||||
self.cache.store(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY, self._ios_oauth_token)
|
||||
|
||||
return self._ios_oauth_token
|
||||
|
||||
def _call_videos_api(self, video_id, unlisted_hash=None, **kwargs):
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
|
||||
video_id, 'Downloading API JSON', headers={
|
||||
'Authorization': f'jwt {jwt_token}',
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._fetch_oauth_token()}',
|
||||
**self._IOS_CLIENT_HEADERS,
|
||||
}, query={
|
||||
'fields': ','.join((
|
||||
'config_url', 'created_time', 'description', 'download', 'license',
|
||||
'metadata.connections.comments.total', 'metadata.connections.likes.total',
|
||||
'release_time', 'stats.plays')),
|
||||
'config_url', 'embed_player_config_url', 'player_embed_url', 'download', 'play',
|
||||
'files', 'description', 'license', 'release_time', 'created_time', 'stats.plays',
|
||||
'metadata.connections.comments.total', 'metadata.connections.likes.total')),
|
||||
}, **kwargs)
|
||||
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=None):
|
||||
# Original/source formats are only available when logged in
|
||||
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
|
||||
return
|
||||
@@ -283,12 +340,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'quality': 1,
|
||||
}
|
||||
|
||||
jwt = jwt or traverse_obj(self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
|
||||
if not jwt:
|
||||
return
|
||||
original_response = api_data or self._call_videos_api(
|
||||
video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
|
||||
video_id, unlisted_hash, fatal=False, expected_status=(403, 404))
|
||||
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
|
||||
download_url = download_data.get('link')
|
||||
if not download_url or download_data.get('quality') != 'source':
|
||||
@@ -327,7 +380,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
(?:
|
||||
(?P<u>user)|
|
||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?:.*?/)??
|
||||
(?:(?!event/).*?/)??
|
||||
(?P<q>
|
||||
(?:
|
||||
play_redirect_hls|
|
||||
@@ -410,6 +463,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 10,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d',
|
||||
},
|
||||
'params': {
|
||||
@@ -500,15 +554,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'The DMCI',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
|
||||
'uploader_id': 'dmci',
|
||||
'timestamp': 1324343742,
|
||||
'timestamp': 1324361742,
|
||||
'upload_date': '20111220',
|
||||
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
|
||||
'description': 'md5:f37b4ad0f3ded6fa16f38ecde16c3c44',
|
||||
'duration': 60,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d',
|
||||
'like_count': int,
|
||||
'tags': 'count:11',
|
||||
'release_timestamp': 1324361742,
|
||||
'release_date': '20111220',
|
||||
},
|
||||
# 'params': {'format': 'Original'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@@ -521,15 +576,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '393756517',
|
||||
# 'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1582642091,
|
||||
'timestamp': 1582660091,
|
||||
'uploader_id': 'frameworkla',
|
||||
'title': 'Straight To Hell - Sabrina: Netflix',
|
||||
'uploader': 'Framework Studio',
|
||||
'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
|
||||
'upload_date': '20200225',
|
||||
'duration': 176,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d',
|
||||
'uploader_url': 'https://vimeo.com/frameworkla',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'release_timestamp': 1582660091,
|
||||
'release_date': '20200225',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@@ -630,7 +688,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'upload_date': '20150209',
|
||||
'timestamp': 1423518307,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/default',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/default',
|
||||
'duration': 10,
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/user20132939',
|
||||
@@ -667,6 +725,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/aliniamedia',
|
||||
'release_date': '20160329',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@@ -678,18 +737,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# 'ext': 'm4v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
|
||||
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
|
||||
'description': 'md5:9441e6829ae94f380cc6417d982f63ac',
|
||||
'uploader_id': 'fireworkchampions',
|
||||
'uploader': 'Firework Champions',
|
||||
'upload_date': '20150910',
|
||||
'timestamp': 1441901895,
|
||||
'timestamp': 1441916295,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d',
|
||||
'uploader_url': 'https://vimeo.com/fireworkchampions',
|
||||
'tags': 'count:6',
|
||||
'duration': 229,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1441916295,
|
||||
'release_date': '20150910',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -820,7 +880,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Raja Virdi',
|
||||
'uploader_id': 'rajavirdi',
|
||||
'uploader_url': 'https://vimeo.com/rajavirdi',
|
||||
'duration': 309,
|
||||
'duration': 300,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
@@ -860,12 +920,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
return checked
|
||||
|
||||
def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
|
||||
|
||||
for retry in (False, True):
|
||||
try:
|
||||
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
|
||||
video = self._call_videos_api(video_id, unlisted_hash)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
|
||||
@@ -873,15 +930,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
|
||||
({json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
)):
|
||||
self._verify_video_password(
|
||||
video_id, self._get_video_password(), viewer['xsrft'])
|
||||
self._verify_video_password(video_id)
|
||||
continue
|
||||
raise
|
||||
|
||||
info = self._parse_config(self._download_json(
|
||||
video['config_url'], video_id), video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, api_data=video)
|
||||
if source_format:
|
||||
info['formats'].append(source_format)
|
||||
|
||||
@@ -904,8 +960,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
|
||||
if not album_id:
|
||||
return
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', album_id, fatal=False)
|
||||
viewer = self._fetch_viewer_info(album_id, fatal=False)
|
||||
if not viewer:
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
viewer = self._parse_json(self._search_regex(
|
||||
@@ -963,9 +1018,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
raise
|
||||
errmsg = error.cause.response.read()
|
||||
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
|
||||
raise ExtractorError(
|
||||
'Cannot download embed-only video without embedding URL. Please call yt-dlp '
|
||||
'with the URL of the page that embeds this video.', expected=True)
|
||||
raise ExtractorError(self._REFERER_HINT, expected=True)
|
||||
# 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
|
||||
status = error.cause.status
|
||||
dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
|
||||
@@ -1010,8 +1063,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
channel_id = self._search_regex(
|
||||
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
|
||||
if channel_id:
|
||||
config_url = self._html_search_regex(
|
||||
r'\bdata-config-url="([^"]+)"', webpage, 'config URL', default=None)
|
||||
config_url = self._extract_config_url(webpage, default=None)
|
||||
video_description = clean_html(get_element_by_class('description', webpage))
|
||||
info_dict.update({
|
||||
'channel_id': channel_id,
|
||||
@@ -1122,7 +1174,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998',
|
||||
'upload_date': '20140906',
|
||||
'timestamp': 1410032453,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'comment_count': int,
|
||||
'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/',
|
||||
'duration': 53,
|
||||
@@ -1132,7 +1184,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# requires Referer to be passed along with og:video:url
|
||||
'url': 'https://vimeo.com/ondemand/36938/126682985',
|
||||
@@ -1149,13 +1201,14 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'duration': 121,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'like_count': int,
|
||||
'tags': 'count:5',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
'url': 'https://vimeo.com/ondemand/nazmaalik',
|
||||
'only_matching': True,
|
||||
@@ -1237,7 +1290,7 @@ class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/nkistudio/videos',
|
||||
'info_dict': {
|
||||
'title': 'Nki',
|
||||
'title': 'AKAMA',
|
||||
'id': 'nkistudio',
|
||||
},
|
||||
'playlist_mincount': 66,
|
||||
@@ -1303,8 +1356,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', album_id, fatal=False)
|
||||
viewer = self._fetch_viewer_info(album_id, fatal=False)
|
||||
if not viewer:
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
viewer = self._parse_json(self._search_regex(
|
||||
@@ -1370,10 +1422,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'user170863801',
|
||||
'uploader_url': 'https://vimeo.com/user170863801',
|
||||
'duration': 30,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
||||
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
||||
@@ -1423,12 +1475,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
|
||||
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
|
||||
data = self._download_json(data_url, video_id)
|
||||
viewer = {}
|
||||
if data.get('isLocked') is True:
|
||||
video_password = self._get_video_password()
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id)
|
||||
self._verify_video_password(video_id, video_password, viewer['xsrft'])
|
||||
self._verify_video_password(video_id)
|
||||
data = self._download_json(data_url, video_id)
|
||||
clip_data = data['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
@@ -1436,7 +1484,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
|
||||
video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
|
||||
video_id, unlisted_hash=clip_data.get('unlistedHash'))
|
||||
if source_format:
|
||||
info_dict['formats'].append(source_format)
|
||||
info_dict['description'] = clean_html(clip_data.get('description'))
|
||||
@@ -1528,20 +1576,22 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'openstreetmapus',
|
||||
'uploader': 'OpenStreetMap US',
|
||||
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
'description': 'md5:2c362968038d4499f4d79f88458590c1',
|
||||
'description': 'md5:8cf69a1a435f2d763f4adf601e9c3125',
|
||||
'duration': 1595,
|
||||
'upload_date': '20130610',
|
||||
'timestamp': 1370893156,
|
||||
'timestamp': 1370907556,
|
||||
'license': 'by',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': 'count:1',
|
||||
'release_timestamp': 1370907556,
|
||||
'release_date': '20130610',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# password-protected VimeoPro page with Vimeo player embed
|
||||
'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
|
||||
@@ -1549,7 +1599,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'id': '764543723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
|
||||
'uploader': 'CADFEM',
|
||||
'uploader_id': 'cadfem',
|
||||
@@ -1561,6 +1611,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'videopassword': 'Conference2022',
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -1597,3 +1648,377 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
|
||||
return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True,
|
||||
description=description)
|
||||
|
||||
|
||||
class VimeoEventIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vimeo:event'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?vimeo\.com/event/(?P<id>\d+)(?:/
|
||||
(?:
|
||||
(?:embed/)?(?P<unlisted_hash>[\da-f]{10})|
|
||||
videos/(?P<video_id>\d+)
|
||||
)
|
||||
)?'''
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>https?://vimeo\.com/event/\d+/embed(?:[/?][^"\']*)?)["\'][^>]*>']
|
||||
_TESTS = [{
|
||||
# stream_privacy.view: 'anybody'
|
||||
'url': 'https://vimeo.com/event/5116195',
|
||||
'info_dict': {
|
||||
'id': '1082194134',
|
||||
'ext': 'mp4',
|
||||
'display_id': '5116195',
|
||||
'title': 'Skidmore College Commencement 2025',
|
||||
'description': 'md5:1902dd5165d21f98aa198297cc729d23',
|
||||
'uploader': 'Skidmore College',
|
||||
'uploader_id': 'user116066434',
|
||||
'uploader_url': 'https://vimeo.com/user116066434',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 9810,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1747502974,
|
||||
'upload_date': '20250517',
|
||||
'release_timestamp': 1747502998,
|
||||
'release_date': '20250517',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# stream_privacy.view: 'embed_only'
|
||||
'url': 'https://vimeo.com/event/5034253/embed',
|
||||
'info_dict': {
|
||||
'id': '1071439154',
|
||||
'ext': 'mp4',
|
||||
'display_id': '5034253',
|
||||
'title': 'Advancing Humans with AI',
|
||||
'description': r're:AI is here to stay, but how do we ensure that people flourish in a world of pervasive AI use.{322}$',
|
||||
'uploader': 'MIT Media Lab',
|
||||
'uploader_id': 'mitmedialab',
|
||||
'uploader_url': 'https://vimeo.com/mitmedialab',
|
||||
'duration': 23235,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'chapters': 'count:37',
|
||||
'release_timestamp': 1744290000,
|
||||
'release_date': '20250410',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'http_headers': {'Referer': 'https://www.media.mit.edu/events/aha-symposium/'},
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# Last entry on 2nd page of the 37 video playlist, but use clip_to_play_id API param shortcut
|
||||
'url': 'https://vimeo.com/event/4753126/videos/1046153257',
|
||||
'info_dict': {
|
||||
'id': '1046153257',
|
||||
'ext': 'mp4',
|
||||
'display_id': '4753126',
|
||||
'title': 'January 12, 2025 The True Vine (Pastor John Mindrup)',
|
||||
'description': 'The True Vine (Pastor \tJohn Mindrup)',
|
||||
'uploader': 'Salem United Church of Christ',
|
||||
'uploader_id': 'user230181094',
|
||||
'uploader_url': 'https://vimeo.com/user230181094',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4962,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1736702464,
|
||||
'upload_date': '20250112',
|
||||
'release_timestamp': 1736702543,
|
||||
'release_date': '20250112',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# "24/7" livestream
|
||||
'url': 'https://vimeo.com/event/4768062',
|
||||
'info_dict': {
|
||||
'id': '1079901414',
|
||||
'ext': 'mp4',
|
||||
'display_id': '4768062',
|
||||
'title': r're:GRACELAND CAM \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'description': '24/7 camera at Graceland Mansion',
|
||||
'uploader': 'Elvis Presley\'s Graceland',
|
||||
'uploader_id': 'visitgraceland',
|
||||
'uploader_url': 'https://vimeo.com/visitgraceland',
|
||||
'release_timestamp': 1745975450,
|
||||
'release_date': '20250430',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {'skip_download': 'livestream'},
|
||||
}, {
|
||||
# stream_privacy.view: 'unlisted' with unlisted_hash in URL path (stream_privacy.embed: 'whitelist')
|
||||
'url': 'https://vimeo.com/event/4259978/3db517c479',
|
||||
'info_dict': {
|
||||
'id': '939104114',
|
||||
'ext': 'mp4',
|
||||
'display_id': '4259978',
|
||||
'title': 'Enhancing Credibility in Your Community Science Project',
|
||||
'description': 'md5:eab953341168b9c146bc3cfe3f716070',
|
||||
'uploader': 'NOAA Research',
|
||||
'uploader_id': 'noaaresearch',
|
||||
'uploader_url': 'https://vimeo.com/noaaresearch',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 3961,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1716408008,
|
||||
'upload_date': '20240522',
|
||||
'release_timestamp': 1716408062,
|
||||
'release_date': '20240522',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# "done" event with video_id in URL and unlisted_hash in VimeoIE URL
|
||||
'url': 'https://vimeo.com/event/595460/videos/498149131/',
|
||||
'info_dict': {
|
||||
'id': '498149131',
|
||||
'ext': 'mp4',
|
||||
'display_id': '595460',
|
||||
'title': '2021 Eighth Annual John Cardinal Foley Lecture on Social Communications',
|
||||
'description': 'Replay: https://vimeo.com/catholicphilly/review/498149131/544f26a12f',
|
||||
'uploader': 'Kearns Media Consulting LLC',
|
||||
'uploader_id': 'kearnsmediaconsulting',
|
||||
'uploader_url': 'https://vimeo.com/kearnsmediaconsulting',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4466,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1612228466,
|
||||
'upload_date': '20210202',
|
||||
'release_timestamp': 1612228538,
|
||||
'release_date': '20210202',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# stream_privacy.view: 'password'; stream_privacy.embed: 'public'
|
||||
'url': 'https://vimeo.com/event/4940578',
|
||||
'info_dict': {
|
||||
'id': '1059263570',
|
||||
'ext': 'mp4',
|
||||
'display_id': '4940578',
|
||||
'title': 'TMAC AKC AGILITY 2-22-2025',
|
||||
'uploader': 'Paws \'N Effect',
|
||||
'uploader_id': 'pawsneffect',
|
||||
'uploader_url': 'https://vimeo.com/pawsneffect',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 33115,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1740261836,
|
||||
'upload_date': '20250222',
|
||||
'release_timestamp': 1740261873,
|
||||
'release_date': '20250222',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '22',
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# API serves a playlist of 37 videos, but the site only streams the newest one (changes every Sunday)
|
||||
'url': 'https://vimeo.com/event/4753126',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Scheduled for 2025.05.15 but never started; "unavailable"; stream_privacy.view: "anybody"
|
||||
'url': 'https://vimeo.com/event/5120811/embed',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vimeo.com/event/5112969/embed?muted=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vimeo.com/event/5097437/embed/interaction?muted=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vimeo.com/event/5113032/embed?autoplay=1&muted=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Ended livestream with video_id
|
||||
'url': 'https://vimeo.com/event/595460/videos/507329569/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# stream_privacy.view: 'unlisted' with unlisted_hash in URL path (stream_privacy.embed: 'public')
|
||||
'url': 'https://vimeo.com/event/4606123/embed/358d60ce2e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# Same result as https://vimeo.com/event/5034253/embed
|
||||
'url': 'https://www.media.mit.edu/events/aha-symposium/',
|
||||
'info_dict': {
|
||||
'id': '1071439154',
|
||||
'ext': 'mp4',
|
||||
'display_id': '5034253',
|
||||
'title': 'Advancing Humans with AI',
|
||||
'description': r're:AI is here to stay, but how do we ensure that people flourish in a world of pervasive AI use.{322}$',
|
||||
'uploader': 'MIT Media Lab',
|
||||
'uploader_id': 'mitmedialab',
|
||||
'uploader_url': 'https://vimeo.com/mitmedialab',
|
||||
'duration': 23235,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'chapters': 'count:37',
|
||||
'release_timestamp': 1744290000,
|
||||
'release_date': '20250410',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}]
|
||||
|
||||
_EVENT_FIELDS = (
|
||||
'title', 'uri', 'schedule', 'stream_description', 'stream_privacy.embed', 'stream_privacy.view',
|
||||
'clip_to_play.name', 'clip_to_play.uri', 'clip_to_play.config_url', 'clip_to_play.live.status',
|
||||
'clip_to_play.privacy.embed', 'clip_to_play.privacy.view', 'clip_to_play.password',
|
||||
'streamable_clip.name', 'streamable_clip.uri', 'streamable_clip.config_url', 'streamable_clip.live.status',
|
||||
)
|
||||
_VIDEOS_FIELDS = ('items', 'uri', 'name', 'config_url', 'duration', 'live.status')
|
||||
|
||||
def _call_events_api(
|
||||
self, event_id, ep=None, unlisted_hash=None, note=None,
|
||||
fields=(), referrer=None, query=None, headers=None,
|
||||
):
|
||||
resource = join_nonempty('event', ep, note, 'API JSON', delim=' ')
|
||||
|
||||
return self._download_json(
|
||||
join_nonempty(
|
||||
'https://api.vimeo.com/live_events',
|
||||
join_nonempty(event_id, unlisted_hash, delim=':'), ep, delim='/'),
|
||||
event_id, f'Downloading {resource}', f'Failed to download {resource}',
|
||||
query=filter_dict({
|
||||
'fields': ','.join(fields) or [],
|
||||
# Correct spelling with 4 R's is deliberate
|
||||
'referrer': referrer,
|
||||
**(query or {}),
|
||||
}), headers=filter_dict({
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'jwt {self._fetch_viewer_info(event_id)["jwt"]}',
|
||||
'Referer': referrer,
|
||||
**(headers or {}),
|
||||
}))
|
||||
|
||||
@staticmethod
|
||||
def _extract_video_id_and_unlisted_hash(video):
|
||||
if not traverse_obj(video, ('uri', {lambda x: x.startswith('/videos/')})):
|
||||
return None, None
|
||||
video_id, _, unlisted_hash = video['uri'][8:].partition(':')
|
||||
return video_id, unlisted_hash or None
|
||||
|
||||
def _vimeo_url_result(self, video_id, unlisted_hash=None, event_id=None):
|
||||
# VimeoIE can extract more metadata and formats for was_live event videos
|
||||
return self.url_result(
|
||||
join_nonempty('https://vimeo.com', video_id, unlisted_hash, delim='/'), VimeoIE,
|
||||
video_id, display_id=event_id, live_status='was_live', url_transparent=True)
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
yield cls._smuggle_referrer(embed_url, url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, _, headers = self._unsmuggle_headers(url)
|
||||
# XXX: Keep key name in sync with _unsmuggle_headers
|
||||
referrer = headers.get('Referer')
|
||||
event_id, unlisted_hash, video_id = self._match_valid_url(url).group('id', 'unlisted_hash', 'video_id')
|
||||
|
||||
for retry in (False, True):
|
||||
try:
|
||||
live_event_data = self._call_events_api(
|
||||
event_id, unlisted_hash=unlisted_hash, fields=self._EVENT_FIELDS,
|
||||
referrer=referrer, query={'clip_to_play_id': video_id or '0'},
|
||||
headers={'Accept': 'application/vnd.vimeo.*+json;version=3.4.9'})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if retry or not isinstance(e.cause, HTTPError) or e.cause.status not in (400, 403):
|
||||
raise
|
||||
response = traverse_obj(e.cause.response.read(), ({json.loads}, {dict})) or {}
|
||||
error_code = response.get('error_code')
|
||||
if error_code == 2204:
|
||||
self._verify_video_password(event_id, path='event')
|
||||
continue
|
||||
if error_code == 3200:
|
||||
raise ExtractorError(self._REFERER_HINT, expected=True)
|
||||
if error_msg := response.get('error'):
|
||||
raise ExtractorError(f'Vimeo says: {error_msg}', expected=True)
|
||||
raise
|
||||
|
||||
# stream_privacy.view can be: 'anybody', 'embed_only', 'nobody', 'password', 'unlisted'
|
||||
view_policy = live_event_data['stream_privacy']['view']
|
||||
if view_policy == 'nobody':
|
||||
raise ExtractorError('This event has not been made available to anyone', expected=True)
|
||||
|
||||
clip_data = traverse_obj(live_event_data, ('clip_to_play', {dict})) or {}
|
||||
# live.status can be: 'streaming' (is_live), 'done' (was_live), 'unavailable' (is_upcoming OR dead)
|
||||
clip_status = traverse_obj(clip_data, ('live', 'status', {str}))
|
||||
start_time = traverse_obj(live_event_data, ('schedule', 'start_time', {str}))
|
||||
release_timestamp = parse_iso8601(start_time)
|
||||
|
||||
if clip_status == 'unavailable' and release_timestamp and release_timestamp > time.time():
|
||||
self.raise_no_formats(f'This live event is scheduled for {start_time}', expected=True)
|
||||
live_status = 'is_upcoming'
|
||||
config_url = None
|
||||
|
||||
elif view_policy == 'embed_only':
|
||||
webpage = self._download_webpage(
|
||||
join_nonempty('https://vimeo.com/event', event_id, 'embed', unlisted_hash, delim='/'),
|
||||
event_id, 'Downloading embed iframe webpage', impersonate=True, headers=headers)
|
||||
# The _parse_config result will overwrite live_status w/ 'is_live' if livestream is active
|
||||
live_status = 'was_live'
|
||||
config_url = self._extract_config_url(webpage)
|
||||
|
||||
else: # view_policy in ('anybody', 'password', 'unlisted')
|
||||
if video_id:
|
||||
clip_id, clip_hash = self._extract_video_id_and_unlisted_hash(clip_data)
|
||||
if video_id == clip_id and clip_status == 'done' and (clip_hash or view_policy != 'unlisted'):
|
||||
return self._vimeo_url_result(clip_id, clip_hash, event_id)
|
||||
|
||||
video_filter = lambda _, v: self._extract_video_id_and_unlisted_hash(v)[0] == video_id
|
||||
else:
|
||||
video_filter = lambda _, v: v['live']['status'] in ('streaming', 'done')
|
||||
|
||||
for page in itertools.count(1):
|
||||
videos_data = self._call_events_api(
|
||||
event_id, 'videos', unlisted_hash=unlisted_hash, note=f'page {page}',
|
||||
fields=self._VIDEOS_FIELDS, referrer=referrer, query={'page': page},
|
||||
headers={'Accept': 'application/vnd.vimeo.*;version=3.4.1'})
|
||||
|
||||
video = traverse_obj(videos_data, ('data', video_filter, any))
|
||||
if video or not traverse_obj(videos_data, ('paging', 'next', {str})):
|
||||
break
|
||||
|
||||
live_status = {
|
||||
'streaming': 'is_live',
|
||||
'done': 'was_live',
|
||||
}.get(traverse_obj(video, ('live', 'status', {str})))
|
||||
|
||||
if not live_status: # requested video_id is unavailable or no videos are available
|
||||
raise ExtractorError('This event video is unavailable', expected=True)
|
||||
elif live_status == 'was_live':
|
||||
return self._vimeo_url_result(*self._extract_video_id_and_unlisted_hash(video), event_id)
|
||||
config_url = video['config_url']
|
||||
|
||||
if config_url: # view_policy == 'embed_only' or live_status == 'is_live'
|
||||
info = filter_dict(self._parse_config(
|
||||
self._download_json(config_url, event_id, 'Downloading config JSON'), event_id))
|
||||
else: # live_status == 'is_upcoming'
|
||||
info = {'id': event_id}
|
||||
|
||||
if info.get('live_status') == 'post_live':
|
||||
self.report_warning('This live event recently ended and some formats may not yet be available')
|
||||
|
||||
return {
|
||||
**traverse_obj(live_event_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('stream_description', {str}),
|
||||
}),
|
||||
'display_id': event_id,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': release_timestamp,
|
||||
**info,
|
||||
}
|
||||
|
||||
@@ -300,6 +300,24 @@ class VKIE(VKBaseIE):
|
||||
'upload_date': '20250130',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://vkvideo.ru/video-50883936_456244102',
|
||||
'info_dict': {
|
||||
'id': '-50883936_456244102',
|
||||
'ext': 'mp4',
|
||||
'title': 'Добивание Украины // Техник в коме // МОЯ ЗЛОСТЬ №140',
|
||||
'description': 'md5:a9bc46181e9ebd0fdd82cef6c0191140',
|
||||
'uploader': 'Стас Ай, Как Просто!',
|
||||
'uploader_id': '-50883936',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4651,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'chapters': 'count:59',
|
||||
'timestamp': 1743333869,
|
||||
'upload_date': '20250330',
|
||||
},
|
||||
},
|
||||
{
|
||||
# live stream, hls and rtmp links, most likely already finished live
|
||||
# stream by the time you are reading this comment
|
||||
@@ -540,7 +558,7 @@ class VKIE(VKBaseIE):
|
||||
'title': ('md_title', {unescapeHTML}),
|
||||
'description': ('description', {clean_html}, filter),
|
||||
'thumbnail': ('jpg', {url_or_none}),
|
||||
'uploader': ('md_author', {str}),
|
||||
'uploader': ('md_author', {unescapeHTML}),
|
||||
'uploader_id': (('author_id', 'authorId'), {str_or_none}, any),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -10,7 +9,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class VoxMediaVolumeIE(OnceIE):
|
||||
class VoxMediaVolumeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P<id>[0-9a-f]{9})'
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -57,7 +56,8 @@ class VoxMediaVolumeIE(OnceIE):
|
||||
if not provider_video_id:
|
||||
continue
|
||||
if provider_video_type == 'brightcove':
|
||||
info['formats'] = self._extract_once_formats(provider_video_id)
|
||||
# TODO: Find embed example or confirm that Vox has stopped using Brightcove
|
||||
raise ExtractorError('Vox Brightcove embeds are currently unsupported')
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
@@ -155,20 +155,6 @@ class VoxMediaIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
'skip': 'Page no longer contain videos',
|
||||
}, {
|
||||
# volume embed, Brightcove Once
|
||||
'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
|
||||
'md5': '2dbc77b8b0bff1894c2fce16eded637d',
|
||||
'info_dict': {
|
||||
'id': '1231c973d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
|
||||
'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
|
||||
'timestamp': 1402938000,
|
||||
'upload_date': '20140616',
|
||||
'duration': 4114,
|
||||
},
|
||||
'add_ie': ['VoxMediaVolume'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -2,9 +2,11 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
@@ -70,8 +72,14 @@ class WatIE(InfoExtractor):
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
if video_info.get('error_code') == 'GEOBLOCKED':
|
||||
error_code = video_info.get('error_code')
|
||||
if error_code == 'GEOBLOCKED':
|
||||
self.raise_geo_restricted(error_desc, video_info.get('geoList'))
|
||||
elif error_code == 'DELIVERY_ERROR':
|
||||
if traverse_obj(video_data, ('delivery', 'code')) == 500:
|
||||
self.report_drm(video_id)
|
||||
error_desc = join_nonempty(
|
||||
error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ')
|
||||
raise ExtractorError(error_desc, expected=True)
|
||||
|
||||
title = video_info['title']
|
||||
|
||||
@@ -290,12 +290,14 @@ class WeverseIE(WeverseBaseIE):
|
||||
|
||||
elif live_status == 'is_live':
|
||||
video_info = self._call_api(
|
||||
f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
f'/video/v1.3/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
video_id, note='Downloading live JSON')
|
||||
playback = self._parse_json(video_info['lipPlayback'], video_id)
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
'media', lambda _, v: v['protocol'] == 'HLS', 'path', {url_or_none}), get_all=False)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
|
||||
# Live subtitles are not downloadable, but extract to silence "ignoring subs" warning
|
||||
formats, _ = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
|
||||
|
||||
elif live_status == 'post_live':
|
||||
if availability in ('premium_only', 'subscriber_only'):
|
||||
|
||||
@@ -45,7 +45,7 @@ class XinpianchangIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id=video_id)
|
||||
webpage = self._download_webpage(url, video_id=video_id, headers={'Referer': url})
|
||||
video_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['detail']['video']
|
||||
|
||||
data = self._download_json(
|
||||
|
||||
@@ -35,6 +35,7 @@ from ...utils import (
|
||||
class _PoTokenContext(enum.Enum):
|
||||
PLAYER = 'player'
|
||||
GVS = 'gvs'
|
||||
SUBS = 'subs'
|
||||
|
||||
|
||||
# any clients starting with _ cannot be explicitly requested by the user
|
||||
@@ -417,6 +418,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
|
||||
_COOKIE_HOWTO_WIKI_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies'
|
||||
|
||||
def ucid_or_none(self, ucid):
|
||||
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
|
||||
|
||||
@@ -451,17 +454,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return preferred_lang
|
||||
|
||||
def _initialize_consent(self):
|
||||
cookies = self._get_cookies('https://www.youtube.com/')
|
||||
if cookies.get('__Secure-3PSID'):
|
||||
if self._has_auth_cookies:
|
||||
return
|
||||
socs = cookies.get('SOCS')
|
||||
socs = self._youtube_cookies.get('SOCS')
|
||||
if socs and not socs.value.startswith('CAA'): # not consented
|
||||
return
|
||||
self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
|
||||
|
||||
def _initialize_pref(self):
|
||||
cookies = self._get_cookies('https://www.youtube.com/')
|
||||
pref_cookie = cookies.get('PREF')
|
||||
pref_cookie = self._youtube_cookies.get('PREF')
|
||||
pref = {}
|
||||
if pref_cookie:
|
||||
try:
|
||||
@@ -472,8 +473,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
|
||||
|
||||
def _initialize_cookie_auth(self):
|
||||
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||
if yt_sapisid or yt_1psapisid or yt_3psapisid:
|
||||
self._passed_auth_cookies = False
|
||||
if self._has_auth_cookies:
|
||||
self._passed_auth_cookies = True
|
||||
self.write_debug('Found YouTube account cookies')
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -492,8 +494,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
@property
|
||||
def _youtube_login_hint(self):
|
||||
return (f'{self._login_hint(method="cookies")}. Also see '
|
||||
'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies '
|
||||
return (f'{self._login_hint(method="cookies")}. Also see {self._COOKIE_HOWTO_WIKI_URL} '
|
||||
'for tips on effectively exporting YouTube cookies')
|
||||
|
||||
def _check_login_required(self):
|
||||
@@ -553,12 +554,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
return f'{scheme} {"_".join(parts)}'
|
||||
|
||||
@property
|
||||
def _youtube_cookies(self):
|
||||
return self._get_cookies('https://www.youtube.com')
|
||||
|
||||
def _get_sid_cookies(self):
|
||||
"""
|
||||
Get SAPISID, 1PSAPISID, 3PSAPISID cookie values
|
||||
@returns sapisid, 1psapisid, 3psapisid
|
||||
"""
|
||||
yt_cookies = self._get_cookies('https://www.youtube.com')
|
||||
yt_cookies = self._youtube_cookies
|
||||
yt_sapisid = try_call(lambda: yt_cookies['SAPISID'].value)
|
||||
yt_3papisid = try_call(lambda: yt_cookies['__Secure-3PAPISID'].value)
|
||||
yt_1papisid = try_call(lambda: yt_cookies['__Secure-1PAPISID'].value)
|
||||
@@ -595,6 +600,31 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
return ' '.join(authorizations)
|
||||
|
||||
@property
|
||||
def is_authenticated(self):
|
||||
return self._has_auth_cookies
|
||||
|
||||
@property
|
||||
def _has_auth_cookies(self):
|
||||
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||
# YouTube doesn't appear to clear 3PSAPISID when rotating cookies (as of 2025-04-26)
|
||||
# But LOGIN_INFO is cleared and should exist if logged in
|
||||
has_login_info = 'LOGIN_INFO' in self._youtube_cookies
|
||||
return bool(has_login_info and (yt_sapisid or yt_1psapisid or yt_3psapisid))
|
||||
|
||||
def _request_webpage(self, *args, **kwargs):
|
||||
response = super()._request_webpage(*args, **kwargs)
|
||||
|
||||
# Check that we are still logged-in and cookies have not rotated after every request
|
||||
if getattr(self, '_passed_auth_cookies', None) and not self._has_auth_cookies:
|
||||
self.report_warning(
|
||||
'The provided YouTube account cookies are no longer valid. '
|
||||
'They have likely been rotated in the browser as a security measure. '
|
||||
f'For tips on how to effectively export YouTube cookies, refer to {self._COOKIE_HOWTO_WIKI_URL} .',
|
||||
only_once=False)
|
||||
|
||||
return response
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
||||
note='Downloading API JSON', errnote='Unable to download API page',
|
||||
context=None, api_key=None, api_hostname=None, default_client='web'):
|
||||
@@ -695,10 +725,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
|
||||
expected_type=str)
|
||||
|
||||
@functools.cached_property
|
||||
def is_authenticated(self):
|
||||
return bool(self._get_sid_authorization_header())
|
||||
|
||||
def extract_ytcfg(self, video_id, webpage):
|
||||
if not webpage:
|
||||
return {}
|
||||
@@ -762,6 +788,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
def _download_ytcfg(self, client, video_id):
|
||||
url = {
|
||||
'mweb': 'https://m.youtube.com',
|
||||
'web': 'https://www.youtube.com',
|
||||
'web_music': 'https://music.youtube.com',
|
||||
'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
|
||||
|
||||
@@ -37,6 +37,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||
'chapters': 'count:20',
|
||||
'comment_count': int,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'clip',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -59,6 +60,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': f'https://www.youtube.com/watch?v={video_id}',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': clip_id,
|
||||
'media_type': 'clip',
|
||||
'section_start': int(clip_data['startTimeMs']) / 1000,
|
||||
'section_end': int(clip_data['endTimeMs']) / 1000,
|
||||
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
|
||||
|
||||
@@ -35,6 +35,7 @@ class YoutubeYtBeIE(YoutubeBaseInfoExtractor):
|
||||
'duration': 59,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'short',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
|
||||
@@ -524,10 +524,16 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
response = self._extract_response(
|
||||
item_id=f'{item_id} page {page_num}',
|
||||
query=continuation, headers=headers, ytcfg=ytcfg,
|
||||
check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
|
||||
check_get_keys=(
|
||||
'continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints',
|
||||
# Playlist recommendations may return with no data - ignore
|
||||
('responseContext', 'serviceTrackingParams', ..., 'params', ..., lambda k, v: k == 'key' and v == 'GetRecommendedMusicPlaylists_rid'),
|
||||
))
|
||||
|
||||
if not response:
|
||||
break
|
||||
|
||||
continuation = None
|
||||
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28702
|
||||
visitor_data = self._extract_visitor_data(response) or visitor_data
|
||||
@@ -564,7 +570,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
yield from func(video_items_renderer)
|
||||
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
|
||||
|
||||
if not video_items_renderer:
|
||||
# In the case only a continuation is returned, try to follow it.
|
||||
# We extract this after trying to extract non-continuation items as otherwise this
|
||||
# may be prioritized over other continuations.
|
||||
# see: https://github.com/yt-dlp/yt-dlp/issues/12933
|
||||
continuation = continuation or self._extract_continuation({'contents': [continuation_item]})
|
||||
|
||||
if not continuation and not video_items_renderer:
|
||||
break
|
||||
|
||||
@staticmethod
|
||||
@@ -999,14 +1011,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 94,
|
||||
'info_dict': {
|
||||
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'title': 'Igor Kleiner Ph.D. - Playlists',
|
||||
'title': 'Igor Kleiner - Playlists',
|
||||
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
|
||||
'uploader': 'Igor Kleiner Ph.D.',
|
||||
'uploader': 'Igor Kleiner ',
|
||||
'uploader_id': '@IgorDataScience',
|
||||
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
|
||||
'channel': 'Igor Kleiner Ph.D.',
|
||||
'channel': 'Igor Kleiner ',
|
||||
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
|
||||
'tags': 'count:23',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
@@ -1016,18 +1028,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 94,
|
||||
'info_dict': {
|
||||
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'title': 'Igor Kleiner Ph.D. - Playlists',
|
||||
'title': 'Igor Kleiner - Playlists',
|
||||
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
|
||||
'uploader': 'Igor Kleiner Ph.D.',
|
||||
'uploader': 'Igor Kleiner ',
|
||||
'uploader_id': '@IgorDataScience',
|
||||
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
|
||||
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
|
||||
'tags': 'count:23',
|
||||
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'channel': 'Igor Kleiner Ph.D.',
|
||||
'channel': 'Igor Kleiner ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
}, {
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'note': 'playlists, series',
|
||||
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
|
||||
'playlist_mincount': 5,
|
||||
@@ -1066,22 +1079,23 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'basic, single video playlist',
|
||||
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
|
||||
'info_dict': {
|
||||
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||
'title': 'youtube-dl public playlist',
|
||||
'id': 'PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
|
||||
'title': 'single video playlist',
|
||||
'description': '',
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'modified_date': '20201130',
|
||||
'channel': 'Sergey M.',
|
||||
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'modified_date': '20250417',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'availability': 'public',
|
||||
'uploader': 'Sergey M.',
|
||||
'uploader_url': 'https://www.youtube.com/@sergeym.6173',
|
||||
'uploader_id': '@sergeym.6173',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'uploader_url': 'https://www.youtube.com/@coletdjnz',
|
||||
'uploader_id': '@coletdjnz',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
@@ -1171,11 +1185,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
}, {
|
||||
'note': 'Community tab',
|
||||
'note': 'Posts tab',
|
||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
|
||||
'info_dict': {
|
||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'title': 'lex will - Community',
|
||||
'title': 'lex will - Posts',
|
||||
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
||||
'channel': 'lex will',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
@@ -1188,30 +1202,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
}, {
|
||||
'note': 'Channels tab',
|
||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
|
||||
'info_dict': {
|
||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'title': 'lex will - Channels',
|
||||
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
||||
'channel': 'lex will',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'tags': ['bible', 'history', 'prophesy'],
|
||||
'channel_follower_count': int,
|
||||
'uploader_url': 'https://www.youtube.com/@lexwill718',
|
||||
'uploader_id': '@lexwill718',
|
||||
'uploader': 'lex will',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'note': 'Search tab',
|
||||
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
|
||||
'playlist_mincount': 40,
|
||||
'info_dict': {
|
||||
'id': 'UCYO_jab_esuFRV4b17AJtAw',
|
||||
'title': '3Blue1Brown - Search - linear algebra',
|
||||
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
|
||||
'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
|
||||
'tags': ['Mathematics'],
|
||||
'channel': '3Blue1Brown',
|
||||
@@ -1232,6 +1230,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||
'info_dict': {
|
||||
@@ -1294,24 +1293,25 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 21,
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'Playlist with "show unavailable videos" button',
|
||||
'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
|
||||
'info_dict': {
|
||||
'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
|
||||
'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'title': 'The Memes Of 2010s.....',
|
||||
'id': 'PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
|
||||
'view_count': int,
|
||||
'channel': 'Phim Siêu Nhân Nhật Bản',
|
||||
'channel': "I'm Not JiNxEd",
|
||||
'tags': [],
|
||||
'description': '',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'description': 'md5:44dc3b315ba69394feaafa2f40e7b2a1',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC5H5H85D1QE5-fuWWQ1hdNg',
|
||||
'channel_id': 'UC5H5H85D1QE5-fuWWQ1hdNg',
|
||||
'modified_date': r're:\d{8}',
|
||||
'availability': 'public',
|
||||
'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
|
||||
'uploader_id': '@phimsieunhannhatban',
|
||||
'uploader': 'Phim Siêu Nhân Nhật Bản',
|
||||
'uploader_url': 'https://www.youtube.com/@imnotjinxed1998',
|
||||
'uploader_id': '@imnotjinxed1998',
|
||||
'uploader': "I'm Not JiNxEd",
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
'playlist_mincount': 150,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
}, {
|
||||
'note': 'Playlist with unavailable videos in page 7',
|
||||
@@ -1334,6 +1334,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 1000,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
|
||||
'info_dict': {
|
||||
@@ -1384,7 +1385,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
|
||||
'info_dict': {
|
||||
'id': 'hGkQjiJLjWQ', # This will keep changing
|
||||
'id': 'YDvsBbKfLPA', # This will keep changing
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'upload_date': r're:\d{8}',
|
||||
@@ -1409,6 +1410,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'uploader_id': '@SkyNews',
|
||||
'uploader': 'Sky News',
|
||||
'channel_is_verified': True,
|
||||
'media_type': 'livestream',
|
||||
'timestamp': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1496,6 +1499,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'VLPL, should redirect to playlist?list=PL...',
|
||||
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'info_dict': {
|
||||
@@ -1537,6 +1541,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
|
||||
# Treat as a general feed
|
||||
# TODO: fix extraction
|
||||
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
|
||||
'info_dict': {
|
||||
'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
|
||||
@@ -1560,21 +1565,21 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
'note': 'unlisted single video playlist',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
|
||||
'info_dict': {
|
||||
'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
|
||||
'title': 'yt-dlp unlisted playlist test',
|
||||
'id': 'PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
|
||||
'title': 'unlisted playlist',
|
||||
'availability': 'unlisted',
|
||||
'tags': [],
|
||||
'modified_date': '20220418',
|
||||
'channel': 'colethedj',
|
||||
'modified_date': '20250417',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
'view_count': int,
|
||||
'description': '',
|
||||
'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
|
||||
'uploader_url': 'https://www.youtube.com/@colethedj1894',
|
||||
'uploader_id': '@colethedj1894',
|
||||
'uploader': 'colethedj',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'uploader_url': 'https://www.youtube.com/@coletdjnz',
|
||||
'uploader_id': '@coletdjnz',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
@@ -1596,6 +1601,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_count': 1,
|
||||
'params': {'extract_flat': True},
|
||||
}, {
|
||||
# By default, recommended is always empty.
|
||||
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
|
||||
'url': 'https://www.youtube.com/feed/recommended',
|
||||
'info_dict': {
|
||||
@@ -1603,7 +1609,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'title': 'recommended',
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
'playlist_count': 0,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'extractor_args': {'youtubetab': {'skip': ['webpage']}},
|
||||
@@ -1628,6 +1634,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
'skip': 'Query for sorting no longer works',
|
||||
}, {
|
||||
# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
|
||||
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
|
||||
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
|
||||
'info_dict': {
|
||||
@@ -1654,11 +1661,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: fix metadata extraction
|
||||
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
|
||||
'info_dict': {
|
||||
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
|
||||
'modified_date': '20220407',
|
||||
'modified_date': '20250115',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
|
||||
'tags': [],
|
||||
'availability': 'unlisted',
|
||||
@@ -1692,6 +1700,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'expected_warnings': ['Preferring "ja"'],
|
||||
}, {
|
||||
# XXX: this should really check flat playlist entries, but the test suite doesn't support that
|
||||
# TODO: fix availability extraction
|
||||
'note': 'preferred lang set with playlist with translated video titles',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
|
||||
'info_dict': {
|
||||
@@ -1714,6 +1723,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
# shorts audio pivot for 2GtVksBMYFM.
|
||||
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
|
||||
# TODO: fix extraction
|
||||
'info_dict': {
|
||||
'id': 'sfv_audio_pivot',
|
||||
'title': 'sfv_audio_pivot',
|
||||
@@ -1751,6 +1761,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
# Should get three playlists for videos, shorts and streams tabs
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
||||
'info_dict': {
|
||||
'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
||||
@@ -1758,7 +1769,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
||||
'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
|
||||
'description': 'md5:01e53f350ab8ad6fcf7c4fedb3c1b99f',
|
||||
'channel': 'Polka Ch. 尾丸ポルカ',
|
||||
'tags': 'count:35',
|
||||
'uploader_url': 'https://www.youtube.com/@OmaruPolka',
|
||||
@@ -1769,14 +1780,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# Shorts tab with channel with handle
|
||||
# TODO: fix channel description
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'url': 'https://www.youtube.com/@NotJustBikes/shorts',
|
||||
'info_dict': {
|
||||
'id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||
'title': 'Not Just Bikes - Shorts',
|
||||
'tags': 'count:10',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
|
||||
'description': 'md5:5e82545b3a041345927a92d0585df247',
|
||||
'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||
'channel': 'Not Just Bikes',
|
||||
@@ -1797,7 +1808,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
|
||||
'channel': '中村悠一',
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
|
||||
'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
|
||||
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
|
||||
'uploader_id': '@Yuichi-Nakamura',
|
||||
'uploader': '中村悠一',
|
||||
@@ -1815,6 +1826,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No videos tab but has a shorts tab
|
||||
# TODO: fix metadata extraction
|
||||
'url': 'https://www.youtube.com/c/TKFShorts',
|
||||
'info_dict': {
|
||||
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
|
||||
@@ -1851,6 +1863,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
# Shorts url result in shorts tab
|
||||
# TODO: Fix channel id extraction
|
||||
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
|
||||
'info_dict': {
|
||||
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
@@ -1879,6 +1892,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'params': {'extract_flat': True},
|
||||
}, {
|
||||
# Live video status should be extracted
|
||||
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
|
||||
'info_dict': {
|
||||
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
|
||||
@@ -1907,6 +1921,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
# Channel renderer metadata. Contains number of videos on the channel
|
||||
# TODO: channels tab removed, change this test to use another page with channel renderer
|
||||
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
|
||||
'info_dict': {
|
||||
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
@@ -1940,7 +1955,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
}],
|
||||
'params': {'extract_flat': True},
|
||||
'skip': 'channels tab removed',
|
||||
}, {
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'url': 'https://www.youtube.com/@3blue1brown/about',
|
||||
'info_dict': {
|
||||
'id': '@3blue1brown',
|
||||
@@ -1950,7 +1967,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
|
||||
'channel': '3Blue1Brown',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
|
||||
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
|
||||
'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
|
||||
'uploader_url': 'https://www.youtube.com/@3blue1brown',
|
||||
'uploader_id': '@3blue1brown',
|
||||
'uploader': '3Blue1Brown',
|
||||
@@ -1976,6 +1993,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_count': 5,
|
||||
}, {
|
||||
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'url': 'https://www.youtube.com/@AHimitsu/releases',
|
||||
'info_dict': {
|
||||
'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
|
||||
@@ -2015,6 +2033,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 100,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
}, {
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'note': 'Tags containing spaces',
|
||||
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
|
||||
'playlist_count': 3,
|
||||
@@ -2035,6 +2054,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
|
||||
'mark fischbach'],
|
||||
},
|
||||
}, {
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/12933
|
||||
'note': 'streams tab, some scheduled streams. Empty intermediate response with only continuation - must follow',
|
||||
'url': 'https://www.youtube.com/@sbcitygov/streams',
|
||||
'playlist_mincount': 150,
|
||||
'info_dict': {
|
||||
'id': 'UCH6-qfQwlUgz9SAf05jvc_w',
|
||||
'channel': 'sbcitygov',
|
||||
'channel_id': 'UCH6-qfQwlUgz9SAf05jvc_w',
|
||||
'title': 'sbcitygov - Live',
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:ca1a92059835c071e33b3db52f4a6d67',
|
||||
'uploader_id': '@sbcitygov',
|
||||
'uploader_url': 'https://www.youtube.com/@sbcitygov',
|
||||
'uploader': 'sbcitygov',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCH6-qfQwlUgz9SAf05jvc_w',
|
||||
'tags': [],
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -23,6 +23,8 @@ from ._base import (
|
||||
_split_innertube_client,
|
||||
short_client_name,
|
||||
)
|
||||
from .pot._director import initialize_pot_director
|
||||
from .pot.provider import PoTokenContext, PoTokenRequest
|
||||
from ..openload import PhantomJSwrapper
|
||||
from ...jsinterp import JSInterpreter
|
||||
from ...networking.exceptions import HTTPError
|
||||
@@ -66,9 +68,13 @@ from ...utils import (
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ...utils.networking import clean_headers, clean_proxies, select_proxy
|
||||
|
||||
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
|
||||
STREAMING_DATA_INITIAL_PO_TOKEN = '__yt_dlp_po_token'
|
||||
STREAMING_DATA_FETCH_SUBS_PO_TOKEN = '__yt_dlp_fetch_subs_po_token'
|
||||
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
|
||||
|
||||
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
|
||||
|
||||
|
||||
@@ -376,6 +382,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Afrojack',
|
||||
'uploader_url': 'https://www.youtube.com/@Afrojack',
|
||||
'uploader_id': '@Afrojack',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
@@ -413,10 +420,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1401991663,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'Age-gate video with embed allowed in public site',
|
||||
'note': 'Formerly an age-gate video with embed allowed in public site',
|
||||
'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
|
||||
'info_dict': {
|
||||
'id': 'HsUATh_Nc2U',
|
||||
@@ -424,8 +432,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'title': 'Godzilla 2 (Official Video)',
|
||||
'description': 'md5:bf77e03fcae5529475e500129b05668a',
|
||||
'upload_date': '20200408',
|
||||
'age_limit': 18,
|
||||
'availability': 'needs_auth',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
|
||||
'channel': 'FlyingKitty',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
|
||||
@@ -443,8 +451,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@FlyingKitty900',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'skip': 'Age-restricted; requires authentication',
|
||||
},
|
||||
{
|
||||
'note': 'Age-gate video embedable only with clientScreen=EMBED',
|
||||
@@ -507,6 +515,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Herr Lurik',
|
||||
'uploader_url': 'https://www.youtube.com/@HerrLurik',
|
||||
'uploader_id': '@HerrLurik',
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -546,6 +555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'deadmau5',
|
||||
'uploader_url': 'https://www.youtube.com/@deadmau5',
|
||||
'uploader_id': '@deadmau5',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'DASH manifest missing',
|
||||
@@ -581,6 +591,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@Olympics',
|
||||
'channel_is_verified': True,
|
||||
'timestamp': 1440707674,
|
||||
'media_type': 'livestream',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires avconv',
|
||||
@@ -615,6 +626,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@AllenMeow',
|
||||
'uploader_id': '@AllenMeow',
|
||||
'timestamp': 1299776999,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
# url_encoded_fmt_stream_map is empty string
|
||||
@@ -809,6 +821,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -868,6 +881,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@BKCHarvard',
|
||||
'uploader_url': 'https://www.youtube.com/@BKCHarvard',
|
||||
'timestamp': 1422422076,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -904,6 +918,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1447987198,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -968,6 +983,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'timestamp': 1484761047,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1070,6 +1086,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'tags': 'count:11',
|
||||
'live_status': 'not_live',
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1124,6 +1141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
|
||||
'uploader_id': '@ElevageOrVert',
|
||||
'timestamp': 1497343210,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1163,6 +1181,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1377976349,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1207,6 +1226,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'uploader': 'The Cinematic Orchestra',
|
||||
'comment_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1275,6 +1295,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
|
||||
'uploader_id': '@walkaroundjapan7124',
|
||||
'timestamp': 1605884416,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1371,6 +1392,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1395685455,
|
||||
'media_type': 'video',
|
||||
}, 'params': {'format': 'mhtml', 'skip_download': True},
|
||||
}, {
|
||||
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
|
||||
@@ -1401,6 +1423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@LeonNguyen',
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1641170939,
|
||||
'media_type': 'video',
|
||||
},
|
||||
}, {
|
||||
# date text is premiered video, ensure upload date in UTC (published 1641172509)
|
||||
@@ -1434,6 +1457,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1641172509,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{ # continuous livestream.
|
||||
@@ -1495,6 +1519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Lesmiscore',
|
||||
'uploader_url': 'https://www.youtube.com/@lesmiscore',
|
||||
'timestamp': 1648005313,
|
||||
'media_type': 'short',
|
||||
},
|
||||
}, {
|
||||
# Prefer primary title+description language metadata by default
|
||||
@@ -1523,6 +1548,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@coletdjnz',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'timestamp': 1662677394,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -1551,6 +1577,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'timestamp': 1659073275,
|
||||
'like_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
|
||||
'expected_warnings': [r'Preferring "fr" translated fields'],
|
||||
@@ -1587,6 +1614,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
|
||||
}, {
|
||||
@@ -1687,6 +1715,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
|
||||
@@ -1719,6 +1748,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': [],
|
||||
'media_type': 'short',
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -1754,6 +1784,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@ChristopherSykesDocumentaries',
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1211825920,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1784,6 +1815,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._code_cache = {}
|
||||
self._player_cache = {}
|
||||
self._pot_director = None
|
||||
|
||||
def _real_initialize(self):
|
||||
super()._real_initialize()
|
||||
self._pot_director = initialize_pot_director(self)
|
||||
|
||||
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
|
||||
lock = threading.Lock()
|
||||
@@ -1819,6 +1855,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
|
||||
continue
|
||||
|
||||
# Formats from ended premieres will be missing a manifest_url
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/8543
|
||||
if not f.get('manifest_url'):
|
||||
break
|
||||
|
||||
return f['manifest_url'], f['manifest_stream_number'], is_live
|
||||
return None
|
||||
|
||||
@@ -1982,7 +2024,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def _player_js_cache_key(self, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
player_path = remove_start(urllib.parse.urlparse(player_url).path, f'/s/player/{player_id}/')
|
||||
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path)
|
||||
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path) or next((
|
||||
v for k, v in self._INVERSE_PLAYER_JS_VARIANT_MAP.items()
|
||||
if re.fullmatch(re.escape(k).replace('en_US', r'[a-zA-Z0-9_]+'), player_path)), None)
|
||||
if not variant:
|
||||
self.write_debug(
|
||||
f'Unable to determine player JS variant\n'
|
||||
@@ -2120,23 +2164,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return ret
|
||||
return inner
|
||||
|
||||
def _load_nsig_code_from_cache(self, player_url):
|
||||
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
|
||||
def _load_player_data_from_cache(self, name, player_url):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||||
|
||||
if func_code := self._player_cache.get(cache_id):
|
||||
return func_code
|
||||
if data := self._player_cache.get(cache_id):
|
||||
return data
|
||||
|
||||
func_code = self.cache.load(*cache_id, min_ver='2025.03.31')
|
||||
if func_code:
|
||||
self._player_cache[cache_id] = func_code
|
||||
data = self.cache.load(*cache_id, min_ver='2025.03.31')
|
||||
if data:
|
||||
self._player_cache[cache_id] = data
|
||||
|
||||
return func_code
|
||||
return data
|
||||
|
||||
def _store_nsig_code_to_cache(self, player_url, func_code):
|
||||
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
|
||||
def _store_player_data_to_cache(self, name, player_url, data):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||||
if cache_id not in self._player_cache:
|
||||
self.cache.store(*cache_id, func_code)
|
||||
self._player_cache[cache_id] = func_code
|
||||
self.cache.store(*cache_id, data)
|
||||
self._player_cache[cache_id] = data
|
||||
|
||||
def _decrypt_signature(self, s, video_id, player_url):
|
||||
"""Turn the encrypted s field into a working signature"""
|
||||
@@ -2179,7 +2223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
self.write_debug(f'Decrypted nsig {s} => {ret}')
|
||||
# Only cache nsig func JS code to disk if successful, and only once
|
||||
self._store_nsig_code_to_cache(player_url, func_code)
|
||||
self._store_player_data_to_cache('nsig', player_url, func_code)
|
||||
return ret
|
||||
|
||||
def _extract_n_function_name(self, jscode, player_url=None):
|
||||
@@ -2298,7 +2342,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
func_code = self._load_nsig_code_from_cache(player_url)
|
||||
func_code = self._load_player_data_from_cache('nsig', player_url)
|
||||
jscode = func_code or self._load_player(video_id, player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
|
||||
@@ -2334,23 +2378,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
Extract signatureTimestamp (sts)
|
||||
Required to tell API what sig/player version is in use.
|
||||
"""
|
||||
sts = None
|
||||
if isinstance(ytcfg, dict):
|
||||
sts = int_or_none(ytcfg.get('STS'))
|
||||
if sts := traverse_obj(ytcfg, ('STS', {int_or_none})):
|
||||
return sts
|
||||
|
||||
if not player_url:
|
||||
error_msg = 'Cannot extract signature timestamp without player url'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg)
|
||||
self.report_warning(error_msg)
|
||||
return None
|
||||
|
||||
sts = self._load_player_data_from_cache('sts', player_url)
|
||||
if sts:
|
||||
return sts
|
||||
|
||||
if code := self._load_player(video_id, player_url, fatal=fatal):
|
||||
sts = int_or_none(self._search_regex(
|
||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
|
||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||
if sts:
|
||||
self._store_player_data_to_cache('sts', player_url, sts)
|
||||
|
||||
if not sts:
|
||||
# Attempt to extract from player
|
||||
if player_url is None:
|
||||
error_msg = 'Cannot extract signature timestamp without player_url.'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg)
|
||||
self.report_warning(error_msg)
|
||||
return
|
||||
code = self._load_player(video_id, player_url, fatal=fatal)
|
||||
if code:
|
||||
sts = int_or_none(self._search_regex(
|
||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
|
||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||
return sts
|
||||
|
||||
def _mark_watched(self, video_id, player_responses):
|
||||
@@ -2818,7 +2866,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
|
||||
def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None, visitor_data=None,
|
||||
data_sync_id=None, session_index=None, player_url=None, video_id=None, **kwargs):
|
||||
data_sync_id=None, session_index=None, player_url=None, video_id=None, webpage=None,
|
||||
required=False, **kwargs):
|
||||
"""
|
||||
Fetch a PO Token for a given client and context. This function will validate required parameters for a given context and client.
|
||||
|
||||
@@ -2832,10 +2881,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
@param session_index: session index.
|
||||
@param player_url: player URL.
|
||||
@param video_id: video ID.
|
||||
@param webpage: video webpage.
|
||||
@param required: Whether the PO Token is required (i.e. try to fetch unless policy is "never").
|
||||
@param kwargs: Additional arguments to pass down. May be more added in the future.
|
||||
@return: The fetched PO Token. None if it could not be fetched.
|
||||
"""
|
||||
|
||||
# TODO(future): This validation should be moved into pot framework.
|
||||
# Some sort of middleware or validation provider perhaps?
|
||||
|
||||
# GVS WebPO Token is bound to visitor_data / Visitor ID when logged out.
|
||||
# Must have visitor_data for it to function.
|
||||
if player_url and context == _PoTokenContext.GVS and not visitor_data and not self.is_authenticated:
|
||||
@@ -2857,6 +2911,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f'Got a GVS PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
|
||||
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
|
||||
|
||||
self.write_debug(f'{video_id}: Retrieved a {context.value} PO Token for {client} client from config')
|
||||
return config_po_token
|
||||
|
||||
# Require GVS WebPO Token if logged in for external fetching
|
||||
@@ -2866,7 +2921,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
|
||||
return
|
||||
|
||||
return self._fetch_po_token(
|
||||
po_token = self._fetch_po_token(
|
||||
client=client,
|
||||
context=context.value,
|
||||
ytcfg=ytcfg,
|
||||
@@ -2875,11 +2930,68 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
session_index=session_index,
|
||||
player_url=player_url,
|
||||
video_id=video_id,
|
||||
video_webpage=webpage,
|
||||
required=required,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if po_token:
|
||||
self.write_debug(f'{video_id}: Retrieved a {context.value} PO Token for {client} client')
|
||||
return po_token
|
||||
|
||||
def _fetch_po_token(self, client, **kwargs):
|
||||
"""(Unstable) External PO Token fetch stub"""
|
||||
context = kwargs.get('context')
|
||||
|
||||
# Avoid fetching PO Tokens when not required
|
||||
fetch_pot_policy = self._configuration_arg('fetch_pot', [''], ie_key=YoutubeIE)[0]
|
||||
if fetch_pot_policy not in ('never', 'auto', 'always'):
|
||||
fetch_pot_policy = 'auto'
|
||||
if (
|
||||
fetch_pot_policy == 'never'
|
||||
or (
|
||||
fetch_pot_policy == 'auto'
|
||||
and _PoTokenContext(context) not in self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
and not kwargs.get('required', False)
|
||||
)
|
||||
):
|
||||
return None
|
||||
|
||||
headers = self.get_param('http_headers').copy()
|
||||
proxies = self._downloader.proxies.copy()
|
||||
clean_headers(headers)
|
||||
clean_proxies(proxies, headers)
|
||||
|
||||
innertube_host = self._select_api_hostname(None, default_client=client)
|
||||
|
||||
pot_request = PoTokenRequest(
|
||||
context=PoTokenContext(context),
|
||||
innertube_context=traverse_obj(kwargs, ('ytcfg', 'INNERTUBE_CONTEXT')),
|
||||
innertube_host=innertube_host,
|
||||
internal_client_name=client,
|
||||
session_index=kwargs.get('session_index'),
|
||||
player_url=kwargs.get('player_url'),
|
||||
video_webpage=kwargs.get('video_webpage'),
|
||||
is_authenticated=self.is_authenticated,
|
||||
visitor_data=kwargs.get('visitor_data'),
|
||||
data_sync_id=kwargs.get('data_sync_id'),
|
||||
video_id=kwargs.get('video_id'),
|
||||
request_cookiejar=self._downloader.cookiejar,
|
||||
|
||||
# All requests that would need to be proxied should be in the
|
||||
# context of www.youtube.com or the innertube host
|
||||
request_proxy=(
|
||||
select_proxy('https://www.youtube.com', proxies)
|
||||
or select_proxy(f'https://{innertube_host}', proxies)
|
||||
),
|
||||
request_headers=headers,
|
||||
request_timeout=self.get_param('socket_timeout'),
|
||||
request_verify_tls=not self.get_param('nocheckcertificate'),
|
||||
request_source_address=self.get_param('source_address'),
|
||||
|
||||
bypass_cache=False,
|
||||
)
|
||||
|
||||
return self._pot_director.get_po_token(pot_request)
|
||||
|
||||
@staticmethod
|
||||
def _is_agegated(player_response):
|
||||
@@ -3028,6 +3140,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
player_url = self._download_player_url(video_id)
|
||||
tried_iframe_fallback = True
|
||||
|
||||
pr = initial_pr if client == 'web' else None
|
||||
|
||||
visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
|
||||
data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||
|
||||
@@ -3037,16 +3151,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'video_id': video_id,
|
||||
'data_sync_id': data_sync_id if self.is_authenticated else None,
|
||||
'player_url': player_url if require_js_player else None,
|
||||
'webpage': webpage,
|
||||
'session_index': self._extract_session_index(master_ytcfg, player_ytcfg),
|
||||
'ytcfg': player_ytcfg,
|
||||
'ytcfg': player_ytcfg or self._get_default_ytcfg(client),
|
||||
}
|
||||
|
||||
player_po_token = self.fetch_po_token(
|
||||
# Don't need a player PO token for WEB if using player response from webpage
|
||||
player_po_token = None if pr else self.fetch_po_token(
|
||||
context=_PoTokenContext.PLAYER, **fetch_po_token_args)
|
||||
|
||||
gvs_po_token = self.fetch_po_token(
|
||||
context=_PoTokenContext.GVS, **fetch_po_token_args)
|
||||
|
||||
fetch_subs_po_token_func = functools.partial(
|
||||
self.fetch_po_token,
|
||||
context=_PoTokenContext.SUBS,
|
||||
**fetch_po_token_args,
|
||||
)
|
||||
|
||||
required_pot_contexts = self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
|
||||
if (
|
||||
@@ -3073,7 +3195,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
only_once=True)
|
||||
deprioritize_pr = True
|
||||
|
||||
pr = initial_pr if client == 'web' else None
|
||||
try:
|
||||
pr = pr or self._extract_player_response(
|
||||
client, video_id,
|
||||
@@ -3091,10 +3212,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if pr_id := self._invalid_player_response(pr, video_id):
|
||||
skipped_clients[client] = pr_id
|
||||
elif pr:
|
||||
# Save client name for introspection later
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
# Save client details for introspection later
|
||||
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
|
||||
sd = pr.setdefault('streamingData', {})
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = client
|
||||
sd[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
|
||||
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
|
||||
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = client
|
||||
f[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
|
||||
@@ -3103,9 +3227,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
prs.append(pr)
|
||||
|
||||
# web_embedded can work around age-gate and age-verification for some embeddable videos
|
||||
if self._is_agegated(pr) and variant != 'web_embedded':
|
||||
append_client(f'web_embedded.{base_client}')
|
||||
# Unauthenticated users will only get web_embedded client formats if age-gated
|
||||
if self._is_agegated(pr) and not self.is_authenticated:
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted; some formats may be missing '
|
||||
f'without authentication. {self._youtube_login_hint}', only_once=True)
|
||||
|
||||
# EU countries require age-verification for accounts to access age-restricted videos
|
||||
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
|
||||
if self.is_authenticated and self._is_agegated(pr):
|
||||
embedding_is_disabled = variant == 'web_embedded' and self._is_unplayable(pr)
|
||||
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted and YouTube is requiring '
|
||||
'account age-verification; some formats may be missing', only_once=True)
|
||||
@@ -3146,6 +3280,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
self.report_warning(msg, only_once=True)
|
||||
|
||||
def _report_pot_subtitles_skipped(self, video_id, client_name, msg=None):
|
||||
msg = msg or (
|
||||
f'{video_id}: Some {client_name} client subtitles require a PO Token which was not provided. '
|
||||
'They will be discarded since they are not downloadable as-is. '
|
||||
f'You can manually pass a Subtitles PO Token for this client with '
|
||||
f'--extractor-args "youtube:po_token={client_name}.subs+XXX" . '
|
||||
f'For more information, refer to {PO_TOKEN_GUIDE_URL}')
|
||||
|
||||
subs_wanted = any((
|
||||
self.get_param('writesubtitles'),
|
||||
self.get_param('writeautomaticsub'),
|
||||
self.get_param('listsubtitles')))
|
||||
|
||||
# Only raise a warning for non-default clients, to not confuse users.
|
||||
if not subs_wanted or client_name in (*self._DEFAULT_CLIENTS, *self._DEFAULT_AUTHED_CLIENTS):
|
||||
self.write_debug(msg, only_once=True)
|
||||
else:
|
||||
self.report_warning(msg, only_once=True)
|
||||
|
||||
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
||||
CHUNK_SIZE = 10 << 20
|
||||
PREFERRED_LANG_VALUE = 10
|
||||
@@ -3232,12 +3385,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
||||
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||||
if not all((sc, fmt_url, player_url, encrypted_sig)):
|
||||
self.report_warning(
|
||||
f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||||
f'{"Your account" if self.is_authenticated else "The current session"} may have '
|
||||
f'the SSAP (server-side ads) experiment which interferes with yt-dlp. '
|
||||
f'Please see https://github.com/yt-dlp/yt-dlp/issues/12482 for more details.',
|
||||
video_id, only_once=True)
|
||||
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||||
if client_name == 'web':
|
||||
msg += 'YouTube is forcing SABR streaming for this client. '
|
||||
else:
|
||||
msg += (
|
||||
f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
|
||||
f'{"your account" if self.is_authenticated else "the current session"}. '
|
||||
)
|
||||
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
|
||||
self.report_warning(msg, video_id, only_once=True)
|
||||
continue
|
||||
try:
|
||||
fmt_url += '&{}={}'.format(
|
||||
@@ -3324,8 +3481,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
||||
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
|
||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||
'preference': -20 if require_po_token else -10 if is_damaged else -2 if itag == '17' else None,
|
||||
# Strictly de-prioritize damaged and 3gp formats
|
||||
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||||
}
|
||||
mime_mobj = re.match(
|
||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||
@@ -3433,6 +3590,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# HLS subs (m3u8) do not need a PO token; save client name for debugging
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles)
|
||||
for f in fmts:
|
||||
if process_manifest_format(f, 'hls', client_name, self._search_regex(
|
||||
@@ -3444,6 +3604,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if po_token:
|
||||
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# TODO: Investigate if DASH subs ever need a PO token; save client name for debugging
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
|
||||
for f in formats:
|
||||
if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
|
||||
@@ -3635,7 +3798,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
|
||||
subreason = clean_html(self._get_text(pemr, 'subreason') or '')
|
||||
if subreason:
|
||||
if subreason == 'The uploader has not made this video available in your country.':
|
||||
if subreason.startswith('The uploader has not made this video available in your country'):
|
||||
countries = get_first(microformats, 'availableCountries')
|
||||
if not countries:
|
||||
regions_allowed = search_meta('regionsAllowed')
|
||||
@@ -3646,6 +3809,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if 'sign in' in reason.lower():
|
||||
reason = remove_end(reason, 'This helps protect our community. Learn more')
|
||||
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
|
||||
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
|
||||
reason += '. YouTube is requiring a captcha challenge before playback'
|
||||
elif "This content isn't available, try again later" in reason:
|
||||
reason = (
|
||||
f'{remove_end(reason.strip(), ".")}. {"Your account" if self.is_authenticated else "The current session"} '
|
||||
f'has been rate-limited by YouTube for up to an hour. It is recommended to use `-t sleep` to add a delay '
|
||||
f'between video requests to avoid exceeding the rate limit. For more information, refer to '
|
||||
f'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#this-content-isnt-available-try-again-later'
|
||||
)
|
||||
self.raise_no_formats(reason, expected=True)
|
||||
|
||||
keywords = get_first(video_details, 'keywords', expected_type=list) or []
|
||||
@@ -3752,53 +3924,94 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'tags': keywords,
|
||||
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
|
||||
'live_status': live_status,
|
||||
'media_type': 'livestream' if get_first(video_details, 'isLiveContent') else None,
|
||||
'media_type': (
|
||||
'livestream' if get_first(video_details, 'isLiveContent')
|
||||
else 'short' if get_first(microformats, 'isShortsEligible')
|
||||
else 'video'),
|
||||
'release_timestamp': live_start_time,
|
||||
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
|
||||
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),
|
||||
}
|
||||
|
||||
def get_lang_code(track):
|
||||
return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
|
||||
or track.get('languageCode'))
|
||||
|
||||
def process_language(container, base_url, lang_code, sub_name, client_name, query):
|
||||
lang_subs = container.setdefault(lang_code, [])
|
||||
for fmt in self._SUBTITLE_FORMATS:
|
||||
query = {**query, 'fmt': fmt}
|
||||
lang_subs.append({
|
||||
'ext': fmt,
|
||||
'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
|
||||
'name': sub_name,
|
||||
STREAMING_DATA_CLIENT_NAME: client_name,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
|
||||
if pctr:
|
||||
def get_lang_code(track):
|
||||
return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
|
||||
or track.get('languageCode'))
|
||||
skipped_subs_clients = set()
|
||||
|
||||
# Converted into dicts to remove duplicates
|
||||
captions = {
|
||||
get_lang_code(sub): sub
|
||||
for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
|
||||
translation_languages = {
|
||||
lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
|
||||
for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
|
||||
# Only web/mweb clients provide translationLanguages, so include initial_pr in the traversal
|
||||
translation_languages = {
|
||||
lang['languageCode']: self._get_text(lang['languageName'], max_runs=1)
|
||||
for lang in traverse_obj(player_responses, (
|
||||
..., 'captions', 'playerCaptionsTracklistRenderer', 'translationLanguages',
|
||||
lambda _, v: v['languageCode'] and v['languageName']))
|
||||
}
|
||||
# NB: Constructing the full subtitle dictionary is slow
|
||||
get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
|
||||
self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
|
||||
|
||||
def process_language(container, base_url, lang_code, sub_name, query):
|
||||
lang_subs = container.setdefault(lang_code, [])
|
||||
for fmt in self._SUBTITLE_FORMATS:
|
||||
query.update({
|
||||
'fmt': fmt,
|
||||
})
|
||||
lang_subs.append({
|
||||
'ext': fmt,
|
||||
'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
|
||||
'name': sub_name,
|
||||
})
|
||||
# Filter out initial_pr which does not have streamingData (smuggled client context)
|
||||
prs = traverse_obj(player_responses, (
|
||||
lambda _, v: v['streamingData'] and v['captions']['playerCaptionsTracklistRenderer']))
|
||||
all_captions = traverse_obj(prs, (
|
||||
..., 'captions', 'playerCaptionsTracklistRenderer', 'captionTracks', ..., {dict}))
|
||||
need_subs_langs = {get_lang_code(sub) for sub in all_captions if sub.get('kind') != 'asr'}
|
||||
need_caps_langs = {
|
||||
remove_start(get_lang_code(sub), 'a-')
|
||||
for sub in all_captions if sub.get('kind') == 'asr'}
|
||||
|
||||
# NB: Constructing the full subtitle dictionary is slow
|
||||
get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
|
||||
self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
|
||||
for lang_code, caption_track in captions.items():
|
||||
base_url = caption_track.get('baseUrl')
|
||||
orig_lang = parse_qs(base_url).get('lang', [None])[-1]
|
||||
if not base_url:
|
||||
continue
|
||||
for pr in prs:
|
||||
pctr = pr['captions']['playerCaptionsTracklistRenderer']
|
||||
client_name = pr['streamingData'][STREAMING_DATA_CLIENT_NAME]
|
||||
innertube_client_name = pr['streamingData'][STREAMING_DATA_INNERTUBE_CONTEXT]['client']['clientName']
|
||||
required_contexts = self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
fetch_subs_po_token_func = pr['streamingData'][STREAMING_DATA_FETCH_SUBS_PO_TOKEN]
|
||||
|
||||
pot_params = {}
|
||||
already_fetched_pot = False
|
||||
|
||||
for caption_track in traverse_obj(pctr, ('captionTracks', lambda _, v: v['baseUrl'])):
|
||||
base_url = caption_track['baseUrl']
|
||||
qs = parse_qs(base_url)
|
||||
lang_code = get_lang_code(caption_track)
|
||||
requires_pot = (
|
||||
# We can detect the experiment for now
|
||||
any(e in traverse_obj(qs, ('exp', ...)) for e in ('xpe', 'xpv'))
|
||||
or _PoTokenContext.SUBS in required_contexts)
|
||||
|
||||
if not already_fetched_pot:
|
||||
already_fetched_pot = True
|
||||
if subs_po_token := fetch_subs_po_token_func(required=requires_pot):
|
||||
pot_params.update({
|
||||
'pot': subs_po_token,
|
||||
'potc': '1',
|
||||
'c': innertube_client_name,
|
||||
})
|
||||
|
||||
if not pot_params and requires_pot:
|
||||
skipped_subs_clients.add(client_name)
|
||||
self._report_pot_subtitles_skipped(video_id, client_name)
|
||||
break
|
||||
|
||||
orig_lang = qs.get('lang', [None])[-1]
|
||||
lang_name = self._get_text(caption_track, 'name', max_runs=1)
|
||||
if caption_track.get('kind') != 'asr':
|
||||
if not lang_code:
|
||||
continue
|
||||
process_language(
|
||||
subtitles, base_url, lang_code, lang_name, {})
|
||||
subtitles, base_url, lang_code, lang_name, client_name, pot_params)
|
||||
if not caption_track.get('isTranslatable'):
|
||||
continue
|
||||
for trans_code, trans_name in translation_languages.items():
|
||||
@@ -3818,10 +4031,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Add an "-orig" label to the original language so that it can be distinguished.
|
||||
# The subs are returned without "-orig" as well for compatibility
|
||||
process_language(
|
||||
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
|
||||
automatic_captions, base_url, f'{trans_code}-orig',
|
||||
f'{trans_name} (Original)', client_name, pot_params)
|
||||
# Setting tlang=lang returns damaged subtitles.
|
||||
process_language(automatic_captions, base_url, trans_code, trans_name,
|
||||
{} if orig_lang == orig_trans_code else {'tlang': trans_code})
|
||||
process_language(
|
||||
automatic_captions, base_url, trans_code, trans_name, client_name,
|
||||
pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params})
|
||||
|
||||
# Avoid duplication if we've already got everything we need
|
||||
need_subs_langs.difference_update(subtitles)
|
||||
need_caps_langs.difference_update(automatic_captions)
|
||||
if not (need_subs_langs or need_caps_langs):
|
||||
break
|
||||
|
||||
if skipped_subs_clients and (need_subs_langs or need_caps_langs):
|
||||
self._report_pot_subtitles_skipped(video_id, True, msg=join_nonempty(
|
||||
f'{video_id}: There are missing subtitles languages because a PO token was not provided.',
|
||||
need_subs_langs and f'Subtitles for these languages are missing: {", ".join(need_subs_langs)}.',
|
||||
need_caps_langs and f'Automatic captions for {len(need_caps_langs)} languages are missing.',
|
||||
delim=' '))
|
||||
|
||||
info['automatic_captions'] = automatic_captions
|
||||
info['subtitles'] = subtitles
|
||||
@@ -3874,7 +4102,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not traverse_obj(initial_data, 'contents'):
|
||||
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
|
||||
initial_data = None
|
||||
if not initial_data:
|
||||
if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
|
||||
query = {'videoId': video_id}
|
||||
query.update(self._get_checkok_params())
|
||||
initial_data = self._extract_response(
|
||||
|
||||
309
yt_dlp/extractor/youtube/pot/README.md
Normal file
309
yt_dlp/extractor/youtube/pot/README.md
Normal file
@@ -0,0 +1,309 @@
|
||||
# YoutubeIE PO Token Provider Framework
|
||||
|
||||
As part of the YouTube extractor, we have a framework for providing PO Tokens programmatically. This can be used by plugins.
|
||||
|
||||
Refer to the [PO Token Guide](https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide) for more information on PO Tokens.
|
||||
|
||||
> [!TIP]
|
||||
> If publishing a PO Token Provider plugin to GitHub, add the [yt-dlp-pot-provider](https://github.com/topics/yt-dlp-pot-provider) topic to your repository to help users find it.
|
||||
|
||||
|
||||
## Public APIs
|
||||
|
||||
- `yt_dlp.extractor.youtube.pot.cache`
|
||||
- `yt_dlp.extractor.youtube.pot.provider`
|
||||
- `yt_dlp.extractor.youtube.pot.utils`
|
||||
|
||||
Everything else is internal-only and no guarantees are made about the API stability.
|
||||
|
||||
> [!WARNING]
|
||||
> We will try our best to maintain stability with the public APIs.
|
||||
> However, due to the nature of extractors and YouTube, we may need to remove or change APIs in the future.
|
||||
> If you are using these APIs outside yt-dlp plugins, please account for this by importing them safely.
|
||||
|
||||
## PO Token Provider
|
||||
|
||||
`yt_dlp.extractor.youtube.pot.provider`
|
||||
|
||||
```python
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenRequest,
|
||||
PoTokenContext,
|
||||
PoTokenProvider,
|
||||
PoTokenResponse,
|
||||
PoTokenProviderError,
|
||||
PoTokenProviderRejectedRequest,
|
||||
register_provider,
|
||||
register_preference,
|
||||
ExternalRequestFeature,
|
||||
)
|
||||
from yt_dlp.networking.common import Request
|
||||
from yt_dlp.extractor.youtube.pot.utils import get_webpo_content_binding
|
||||
from yt_dlp.utils import traverse_obj
|
||||
from yt_dlp.networking.exceptions import RequestError
|
||||
import json
|
||||
|
||||
|
||||
@register_provider
|
||||
class MyPoTokenProviderPTP(PoTokenProvider): # Provider class name must end with "PTP"
|
||||
PROVIDER_VERSION = '0.2.1'
|
||||
# Define a unique display name for the provider
|
||||
PROVIDER_NAME = 'my-provider'
|
||||
BUG_REPORT_LOCATION = 'https://issues.example.com/report'
|
||||
|
||||
# -- Validation shortcuts. Set these to None to disable. --
|
||||
|
||||
# Innertube Client Name.
|
||||
# For example, "WEB", "ANDROID", "TVHTML5".
|
||||
# For a list of WebPO client names,
|
||||
# see yt_dlp.extractor.youtube.pot.utils.WEBPO_CLIENTS.
|
||||
# Also see yt_dlp.extractor.youtube._base.INNERTUBE_CLIENTS
|
||||
# for a list of client names currently supported by the YouTube extractor.
|
||||
_SUPPORTED_CLIENTS = ('WEB', 'TVHTML5')
|
||||
|
||||
_SUPPORTED_CONTEXTS = (
|
||||
PoTokenContext.GVS,
|
||||
)
|
||||
|
||||
# If your provider makes external requests to websites (i.e. to youtube.com)
|
||||
# using another library or service (i.e., not _request_webpage),
|
||||
# set the request features that are supported here.
|
||||
# If only using _request_webpage to make external requests, set this to None.
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES = (
|
||||
ExternalRequestFeature.PROXY_SCHEME_HTTP,
|
||||
ExternalRequestFeature.SOURCE_ADDRESS,
|
||||
ExternalRequestFeature.DISABLE_TLS_VERIFICATION
|
||||
)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the provider is available (e.g. all required dependencies are available)
|
||||
This is used to determine if the provider should be used and to provide debug information.
|
||||
|
||||
IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
|
||||
|
||||
Since this is called multiple times, we recommend caching the result.
|
||||
"""
|
||||
return True
|
||||
|
||||
def close(self):
|
||||
# Optional close hook, called when YoutubeDL is closed.
|
||||
pass
|
||||
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
# ℹ️ If you need to validate the request before making the request to the external source.
|
||||
# Raise yt_dlp.extractor.youtube.pot.provider.PoTokenProviderRejectedRequest if the request is not supported.
|
||||
if request.is_authenticated:
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
'This provider does not support authenticated requests'
|
||||
)
|
||||
|
||||
# ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubepot-<PROVIDER_KEY>`.
|
||||
# For this example, the extractor arg would be:
|
||||
# `--extractor-args "youtubepot-mypotokenprovider:url=https://custom.example.com/get_pot"`
|
||||
external_provider_url = self._configuration_arg(
|
||||
'url', default=['https://provider.example.com/get_pot'])[0]
|
||||
|
||||
# See below for logging guidelines
|
||||
self.logger.trace(f'Using external provider URL: {external_provider_url}')
|
||||
|
||||
# You should use the internal HTTP client to make requests where possible,
|
||||
# as it will handle cookies and other networking settings passed to yt-dlp.
|
||||
try:
|
||||
# See docstring in _request_webpage method for request tips
|
||||
response = self._request_webpage(
|
||||
Request(external_provider_url, data=json.dumps({
|
||||
'content_binding': get_webpo_content_binding(request),
|
||||
'proxy': request.request_proxy,
|
||||
'headers': request.request_headers,
|
||||
'source_address': request.request_source_address,
|
||||
'verify_tls': request.request_verify_tls,
|
||||
# Important: If your provider has its own caching, please respect `bypass_cache`.
|
||||
# This may be used in the future to request a fresh PO Token if required.
|
||||
'do_not_cache': request.bypass_cache,
|
||||
}).encode(), proxies={'all': None}),
|
||||
pot_request=request,
|
||||
note=(
|
||||
f'Requesting {request.context.value} PO Token '
|
||||
f'for {request.internal_client_name} client from external provider'),
|
||||
)
|
||||
|
||||
except RequestError as e:
|
||||
# ℹ️ If there is an error, raise PoTokenProviderError.
|
||||
# You can specify whether it is expected or not. If it is unexpected,
|
||||
# the log will include a link to the bug report location (BUG_REPORT_LOCATION).
|
||||
raise PoTokenProviderError(
|
||||
'Networking error while fetching to get PO Token from external provider',
|
||||
expected=True
|
||||
) from e
|
||||
|
||||
# Note: PO Token is expected to be base64url encoded
|
||||
po_token = traverse_obj(response, 'po_token')
|
||||
if not po_token:
|
||||
raise PoTokenProviderError(
|
||||
'Bad PO Token Response from external provider',
|
||||
expected=False
|
||||
)
|
||||
|
||||
return PoTokenResponse(
|
||||
po_token=po_token,
|
||||
# Optional, add a custom expiration timestamp for the token. Use for caching.
|
||||
# By default, yt-dlp will use the default ttl from a registered cache spec (see below)
|
||||
# Set to 0 or -1 to not cache this response.
|
||||
expires_at=None,
|
||||
)
|
||||
|
||||
|
||||
# If there are multiple PO Token Providers that can handle the same PoTokenRequest,
|
||||
# you can define a preference function to increase/decrease the priority of providers.
|
||||
|
||||
@register_preference(MyPoTokenProviderPTP)
|
||||
def my_provider_preference(provider: PoTokenProvider, request: PoTokenRequest) -> int:
|
||||
return 50
|
||||
```
|
||||
|
||||
## Logging Guidelines
|
||||
|
||||
- Use the `self.logger` object to log messages.
|
||||
- When making HTTP requests or any other expensive operation, use `self.logger.info` to log a message to standard non-verbose output.
|
||||
- This lets users know what is happening when a time-expensive operation is taking place.
|
||||
- It is recommended to include the PO Token context and internal client name in the message if possible.
|
||||
- For example, `self.logger.info(f'Requesting {request.context.value} PO Token for {request.internal_client_name} client from external provider')`.
|
||||
- Use `self.logger.debug` to log a message to the verbose output (`--verbose`).
|
||||
- For debugging information visible to users posting verbose logs.
|
||||
- Try to not log too much, prefer using trace logging for detailed debug messages.
|
||||
- Use `self.logger.trace` to log a message to the PO Token debug output (`--extractor-args "youtube:pot_trace=true"`).
|
||||
- Log as much as you like here as needed for debugging your provider.
|
||||
- Avoid logging PO Tokens or any sensitive information to debug or info output.
|
||||
|
||||
## Debugging
|
||||
|
||||
- Use `-v --extractor-args "youtube:pot_trace=true"` to enable PO Token debug output.
|
||||
|
||||
## Caching
|
||||
|
||||
> [!WARNING]
|
||||
> The following describes more advance features that most users/developers will not need to use.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> yt-dlp currently has a built-in LRU Memory Cache Provider and a cache spec provider for WebPO Tokens.
|
||||
> You should only need to implement cache providers if you want an external cache, or a cache spec if you are handling non-WebPO Tokens.
|
||||
|
||||
### Cache Providers
|
||||
|
||||
`yt_dlp.extractor.youtube.pot.cache`
|
||||
|
||||
```python
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
PoTokenCacheProvider,
|
||||
register_preference,
|
||||
register_provider
|
||||
)
|
||||
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest
|
||||
|
||||
|
||||
@register_provider
|
||||
class MyCacheProviderPCP(PoTokenCacheProvider): # Provider class name must end with "PCP"
|
||||
PROVIDER_VERSION = '0.1.0'
|
||||
# Define a unique display name for the provider
|
||||
PROVIDER_NAME = 'my-cache-provider'
|
||||
BUG_REPORT_LOCATION = 'https://issues.example.com/report'
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the provider is available (e.g. all required dependencies are available)
|
||||
This is used to determine if the provider should be used and to provide debug information.
|
||||
|
||||
IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
|
||||
|
||||
Since this is called multiple times, we recommend caching the result.
|
||||
"""
|
||||
return True
|
||||
|
||||
def get(self, key: str):
|
||||
# ℹ️ Similar to PO Token Providers, Cache Providers and Cache Spec Providers
|
||||
# are passed down extractor args matching key youtubepot-<PROVIDER_KEY>.
|
||||
some_setting = self._configuration_arg('some_setting', default=['default_value'])[0]
|
||||
return self.my_cache.get(key)
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
# ⚠ expires_at MUST be respected.
|
||||
# Cache entries should not be returned if they have expired.
|
||||
self.my_cache.store(key, value, expires_at)
|
||||
|
||||
def delete(self, key: str):
|
||||
self.my_cache.delete(key)
|
||||
|
||||
def close(self):
|
||||
# Optional close hook, called when the YoutubeDL instance is closed.
|
||||
pass
|
||||
|
||||
# If there are multiple PO Token Cache Providers available, you can
|
||||
# define a preference function to increase/decrease the priority of providers.
|
||||
|
||||
# IMPORTANT: Providers should be in preference of cache lookup time.
|
||||
# For example, a memory cache should have a higher preference than a disk cache.
|
||||
|
||||
# VERY IMPORTANT: yt-dlp has a built-in memory cache with a priority of 10000.
|
||||
# Your cache provider should be lower than this.
|
||||
|
||||
|
||||
@register_preference(MyCacheProviderPCP)
|
||||
def my_cache_preference(provider: PoTokenCacheProvider, request: PoTokenRequest) -> int:
|
||||
return 50
|
||||
```
|
||||
|
||||
### Cache Specs
|
||||
|
||||
`yt_dlp.extractor.youtube.pot.cache`
|
||||
|
||||
These are used to provide information on how to cache a particular PO Token Request.
|
||||
You might have a different cache spec for different kinds of PO Tokens.
|
||||
|
||||
```python
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
PoTokenCacheSpec,
|
||||
PoTokenCacheSpecProvider,
|
||||
CacheProviderWritePolicy,
|
||||
register_spec,
|
||||
)
|
||||
from yt_dlp.utils import traverse_obj
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest
|
||||
|
||||
|
||||
@register_spec
|
||||
class MyCacheSpecProviderPCSP(PoTokenCacheSpecProvider): # Provider class name must end with "PCSP"
|
||||
PROVIDER_VERSION = '0.1.0'
|
||||
# Define a unique display name for the provider
|
||||
PROVIDER_NAME = 'mycachespec'
|
||||
BUG_REPORT_LOCATION = 'https://issues.example.com/report'
|
||||
|
||||
def generate_cache_spec(self, request: PoTokenRequest):
|
||||
|
||||
client_name = traverse_obj(request.innertube_context, ('client', 'clientName'))
|
||||
if client_name != 'ANDROID':
|
||||
# ℹ️ If the request is not supported by the cache spec, return None
|
||||
return None
|
||||
|
||||
# Generate a cache spec for the request
|
||||
return PoTokenCacheSpec(
|
||||
# Key bindings to uniquely identify the request. These are used to generate a cache key.
|
||||
key_bindings={
|
||||
'client_name': client_name,
|
||||
'content_binding': 'unique_content_binding',
|
||||
'ip': traverse_obj(request.innertube_context, ('client', 'remoteHost')),
|
||||
'source_address': request.request_source_address,
|
||||
'proxy': request.request_proxy,
|
||||
},
|
||||
# Default Cache TTL in seconds
|
||||
default_ttl=21600,
|
||||
|
||||
# Optional: Specify a write policy.
|
||||
# WRITE_FIRST will write to the highest priority provider only,
|
||||
# whereas WRITE_ALL will write to all providers.
|
||||
# WRITE_FIRST may be useful if the PO Token is short-lived
|
||||
# and there is no use writing to all providers.
|
||||
write_policy=CacheProviderWritePolicy.WRITE_ALL,
|
||||
)
|
||||
```
|
||||
3
yt_dlp/extractor/youtube/pot/__init__.py
Normal file
3
yt_dlp/extractor/youtube/pot/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
# Trigger import of built-in providers
|
||||
from ._builtin.memory_cache import MemoryLRUPCP as _MemoryLRUPCP # noqa: F401
|
||||
from ._builtin.webpo_cachespec import WebPoPCSP as _WebPoPCSP # noqa: F401
|
||||
0
yt_dlp/extractor/youtube/pot/_builtin/__init__.py
Normal file
0
yt_dlp/extractor/youtube/pot/_builtin/__init__.py
Normal file
78
yt_dlp/extractor/youtube/pot/_builtin/memory_cache.py
Normal file
78
yt_dlp/extractor/youtube/pot/_builtin/memory_cache.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import typing
|
||||
from threading import Lock
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||||
from yt_dlp.extractor.youtube.pot._registry import _pot_memory_cache
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
PoTokenCacheProvider,
|
||||
register_preference,
|
||||
register_provider,
|
||||
)
|
||||
|
||||
|
||||
def initialize_global_cache(max_size: int):
|
||||
if _pot_memory_cache.value.get('cache') is None:
|
||||
_pot_memory_cache.value['cache'] = {}
|
||||
_pot_memory_cache.value['lock'] = Lock()
|
||||
_pot_memory_cache.value['max_size'] = max_size
|
||||
|
||||
if _pot_memory_cache.value['max_size'] != max_size:
|
||||
raise ValueError('Cannot change max_size of initialized global memory cache')
|
||||
|
||||
return (
|
||||
_pot_memory_cache.value['cache'],
|
||||
_pot_memory_cache.value['lock'],
|
||||
_pot_memory_cache.value['max_size'],
|
||||
)
|
||||
|
||||
|
||||
@register_provider
|
||||
class MemoryLRUPCP(PoTokenCacheProvider, BuiltinIEContentProvider):
|
||||
PROVIDER_NAME = 'memory'
|
||||
DEFAULT_CACHE_SIZE = 25
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args,
|
||||
initialize_cache: typing.Callable[[int], tuple[dict[str, tuple[str, int]], Lock, int]] = initialize_global_cache,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.cache, self.lock, self.max_size = initialize_cache(self.DEFAULT_CACHE_SIZE)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def get(self, key: str) -> str | None:
|
||||
with self.lock:
|
||||
if key not in self.cache:
|
||||
return None
|
||||
value, expires_at = self.cache.pop(key)
|
||||
if expires_at < int(dt.datetime.now(dt.timezone.utc).timestamp()):
|
||||
return None
|
||||
self.cache[key] = (value, expires_at)
|
||||
return value
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
with self.lock:
|
||||
if expires_at < int(dt.datetime.now(dt.timezone.utc).timestamp()):
|
||||
return
|
||||
if key in self.cache:
|
||||
self.cache.pop(key)
|
||||
self.cache[key] = (value, expires_at)
|
||||
if len(self.cache) > self.max_size:
|
||||
oldest_key = next(iter(self.cache))
|
||||
self.cache.pop(oldest_key)
|
||||
|
||||
def delete(self, key: str):
|
||||
with self.lock:
|
||||
self.cache.pop(key, None)
|
||||
|
||||
|
||||
@register_preference(MemoryLRUPCP)
|
||||
def memorylru_preference(*_, **__):
|
||||
# Memory LRU Cache SHOULD be the highest priority
|
||||
return 10000
|
||||
48
yt_dlp/extractor/youtube/pot/_builtin/webpo_cachespec.py
Normal file
48
yt_dlp/extractor/youtube/pot/_builtin/webpo_cachespec.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
CacheProviderWritePolicy,
|
||||
PoTokenCacheSpec,
|
||||
PoTokenCacheSpecProvider,
|
||||
register_spec,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenRequest,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.utils import ContentBindingType, get_webpo_content_binding
|
||||
from yt_dlp.utils import traverse_obj
|
||||
|
||||
|
||||
@register_spec
|
||||
class WebPoPCSP(PoTokenCacheSpecProvider, BuiltinIEContentProvider):
|
||||
PROVIDER_NAME = 'webpo'
|
||||
|
||||
def generate_cache_spec(self, request: PoTokenRequest) -> PoTokenCacheSpec | None:
|
||||
bind_to_visitor_id = self._configuration_arg(
|
||||
'bind_to_visitor_id', default=['true'])[0] == 'true'
|
||||
|
||||
content_binding, content_binding_type = get_webpo_content_binding(
|
||||
request, bind_to_visitor_id=bind_to_visitor_id)
|
||||
|
||||
if not content_binding or not content_binding_type:
|
||||
return None
|
||||
|
||||
write_policy = CacheProviderWritePolicy.WRITE_ALL
|
||||
if content_binding_type == ContentBindingType.VIDEO_ID:
|
||||
write_policy = CacheProviderWritePolicy.WRITE_FIRST
|
||||
|
||||
return PoTokenCacheSpec(
|
||||
key_bindings={
|
||||
't': 'webpo',
|
||||
'cb': content_binding,
|
||||
'cbt': content_binding_type.value,
|
||||
'ip': traverse_obj(request.innertube_context, ('client', 'remoteHost')),
|
||||
'sa': request.request_source_address,
|
||||
'px': request.request_proxy,
|
||||
},
|
||||
# Integrity token response usually states it has a ttl of 12 hours (43200 seconds).
|
||||
# We will default to 6 hours to be safe.
|
||||
default_ttl=21600,
|
||||
write_policy=write_policy,
|
||||
)
|
||||
468
yt_dlp/extractor/youtube/pot/_director.py
Normal file
468
yt_dlp/extractor/youtube/pot/_director.py
Normal file
@@ -0,0 +1,468 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import dataclasses
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import json
|
||||
import typing
|
||||
import urllib.parse
|
||||
from collections.abc import Iterable
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import (
|
||||
BuiltinIEContentProvider,
|
||||
IEContentProvider,
|
||||
IEContentProviderLogger,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._registry import (
|
||||
_pot_cache_provider_preferences,
|
||||
_pot_cache_providers,
|
||||
_pot_pcs_providers,
|
||||
_pot_providers,
|
||||
_ptp_preferences,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
CacheProviderWritePolicy,
|
||||
PoTokenCacheProvider,
|
||||
PoTokenCacheProviderError,
|
||||
PoTokenCacheSpec,
|
||||
PoTokenCacheSpecProvider,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenProvider,
|
||||
PoTokenProviderError,
|
||||
PoTokenProviderRejectedRequest,
|
||||
PoTokenRequest,
|
||||
PoTokenResponse,
|
||||
provider_bug_report_message,
|
||||
)
|
||||
from yt_dlp.utils import bug_reports_message, format_field, join_nonempty
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from yt_dlp.extractor.youtube.pot.cache import CacheProviderPreference
|
||||
from yt_dlp.extractor.youtube.pot.provider import Preference
|
||||
|
||||
|
||||
class YoutubeIEContentProviderLogger(IEContentProviderLogger):
|
||||
def __init__(self, ie, prefix, log_level: IEContentProviderLogger.LogLevel | None = None):
|
||||
self.__ie = ie
|
||||
self.prefix = prefix
|
||||
self.log_level = log_level if log_level is not None else self.LogLevel.INFO
|
||||
|
||||
def _format_msg(self, message: str):
|
||||
prefixstr = format_field(self.prefix, None, '[%s] ')
|
||||
return f'{prefixstr}{message}'
|
||||
|
||||
def trace(self, message: str):
|
||||
if self.log_level <= self.LogLevel.TRACE:
|
||||
self.__ie.write_debug(self._format_msg('TRACE: ' + message))
|
||||
|
||||
def debug(self, message: str):
|
||||
if self.log_level <= self.LogLevel.DEBUG:
|
||||
self.__ie.write_debug(self._format_msg(message))
|
||||
|
||||
def info(self, message: str):
|
||||
if self.log_level <= self.LogLevel.INFO:
|
||||
self.__ie.to_screen(self._format_msg(message))
|
||||
|
||||
def warning(self, message: str, *, once=False):
|
||||
if self.log_level <= self.LogLevel.WARNING:
|
||||
self.__ie.report_warning(self._format_msg(message), only_once=once)
|
||||
|
||||
def error(self, message: str):
|
||||
if self.log_level <= self.LogLevel.ERROR:
|
||||
self.__ie._downloader.report_error(self._format_msg(message), is_error=False)
|
||||
|
||||
|
||||
class PoTokenCache:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
logger: IEContentProviderLogger,
|
||||
cache_providers: list[PoTokenCacheProvider],
|
||||
cache_spec_providers: list[PoTokenCacheSpecProvider],
|
||||
cache_provider_preferences: list[CacheProviderPreference] | None = None,
|
||||
):
|
||||
self.cache_providers: dict[str, PoTokenCacheProvider] = {
|
||||
provider.PROVIDER_KEY: provider for provider in (cache_providers or [])}
|
||||
self.cache_provider_preferences: list[CacheProviderPreference] = cache_provider_preferences or []
|
||||
self.cache_spec_providers: dict[str, PoTokenCacheSpecProvider] = {
|
||||
provider.PROVIDER_KEY: provider for provider in (cache_spec_providers or [])}
|
||||
self.logger = logger
|
||||
|
||||
def _get_cache_providers(self, request: PoTokenRequest) -> Iterable[PoTokenCacheProvider]:
|
||||
"""Sorts available cache providers by preference, given a request"""
|
||||
preferences = {
|
||||
provider: sum(pref(provider, request) for pref in self.cache_provider_preferences)
|
||||
for provider in self.cache_providers.values()
|
||||
}
|
||||
if self.logger.log_level <= self.logger.LogLevel.TRACE:
|
||||
# calling is_available() for every PO Token provider upfront may have some overhead
|
||||
self.logger.trace(f'PO Token Cache Providers: {provider_display_list(self.cache_providers.values())}')
|
||||
self.logger.trace('Cache Provider preferences for this request: {}'.format(', '.join(
|
||||
f'{provider.PROVIDER_KEY}={pref}' for provider, pref in preferences.items())))
|
||||
|
||||
return (
|
||||
provider for provider in sorted(
|
||||
self.cache_providers.values(), key=preferences.get, reverse=True) if provider.is_available())
|
||||
|
||||
def _get_cache_spec(self, request: PoTokenRequest) -> PoTokenCacheSpec | None:
|
||||
for provider in self.cache_spec_providers.values():
|
||||
if not provider.is_available():
|
||||
continue
|
||||
try:
|
||||
spec = provider.generate_cache_spec(request)
|
||||
if not spec:
|
||||
continue
|
||||
if not validate_cache_spec(spec):
|
||||
self.logger.error(
|
||||
f'PoTokenCacheSpecProvider "{provider.PROVIDER_KEY}" generate_cache_spec() '
|
||||
f'returned invalid spec {spec}{provider_bug_report_message(provider)}')
|
||||
continue
|
||||
spec = dataclasses.replace(spec, _provider=provider)
|
||||
self.logger.trace(
|
||||
f'Retrieved cache spec {spec} from cache spec provider "{provider.PROVIDER_NAME}"')
|
||||
return spec
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f'Error occurred with "{provider.PROVIDER_NAME}" PO Token cache spec provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider)}')
|
||||
continue
|
||||
return None
|
||||
|
||||
def _generate_key_bindings(self, spec: PoTokenCacheSpec) -> dict[str, str]:
|
||||
bindings_cleaned = {
|
||||
**{k: v for k, v in spec.key_bindings.items() if v is not None},
|
||||
# Allow us to invalidate caches if such need arises
|
||||
'_dlp_cache': 'v1',
|
||||
}
|
||||
if spec._provider:
|
||||
bindings_cleaned['_p'] = spec._provider.PROVIDER_KEY
|
||||
self.logger.trace(f'Generated cache key bindings: {bindings_cleaned}')
|
||||
return bindings_cleaned
|
||||
|
||||
def _generate_key(self, bindings: dict) -> str:
|
||||
binding_string = ''.join(repr(dict(sorted(bindings.items()))))
|
||||
return hashlib.sha256(binding_string.encode()).hexdigest()
|
||||
|
||||
def get(self, request: PoTokenRequest) -> PoTokenResponse | None:
|
||||
spec = self._get_cache_spec(request)
|
||||
if not spec:
|
||||
self.logger.trace('No cache spec available for this request, unable to fetch from cache')
|
||||
return None
|
||||
|
||||
cache_key = self._generate_key(self._generate_key_bindings(spec))
|
||||
self.logger.trace(f'Attempting to access PO Token cache using key: {cache_key}')
|
||||
|
||||
for idx, provider in enumerate(self._get_cache_providers(request)):
|
||||
try:
|
||||
self.logger.trace(
|
||||
f'Attempting to fetch PO Token response from "{provider.PROVIDER_NAME}" cache provider')
|
||||
cache_response = provider.get(cache_key)
|
||||
if not cache_response:
|
||||
continue
|
||||
try:
|
||||
po_token_response = PoTokenResponse(**json.loads(cache_response))
|
||||
except (TypeError, ValueError, json.JSONDecodeError):
|
||||
po_token_response = None
|
||||
if not validate_response(po_token_response):
|
||||
self.logger.error(
|
||||
f'Invalid PO Token response retrieved from cache provider "{provider.PROVIDER_NAME}": '
|
||||
f'{cache_response}{provider_bug_report_message(provider)}')
|
||||
provider.delete(cache_key)
|
||||
continue
|
||||
self.logger.trace(
|
||||
f'PO Token response retrieved from cache using "{provider.PROVIDER_NAME}" provider: '
|
||||
f'{po_token_response}')
|
||||
if idx > 0:
|
||||
# Write back to the highest priority cache provider,
|
||||
# so we stop trying to fetch from lower priority providers
|
||||
self.logger.trace('Writing PO Token response to highest priority cache provider')
|
||||
self.store(request, po_token_response, write_policy=CacheProviderWritePolicy.WRITE_FIRST)
|
||||
|
||||
return po_token_response
|
||||
except PoTokenCacheProviderError as e:
|
||||
self.logger.warning(
|
||||
f'Error from "{provider.PROVIDER_NAME}" PO Token cache provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider) if not e.expected else ""}')
|
||||
continue
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f'Error occurred with "{provider.PROVIDER_NAME}" PO Token cache provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider)}',
|
||||
)
|
||||
continue
|
||||
return None
|
||||
|
||||
def store(
|
||||
self,
|
||||
request: PoTokenRequest,
|
||||
response: PoTokenResponse,
|
||||
write_policy: CacheProviderWritePolicy | None = None,
|
||||
):
|
||||
spec = self._get_cache_spec(request)
|
||||
if not spec:
|
||||
self.logger.trace('No cache spec available for this request. Not caching.')
|
||||
return
|
||||
|
||||
if not validate_response(response):
|
||||
self.logger.error(
|
||||
f'Invalid PO Token response provided to PoTokenCache.store(): '
|
||||
f'{response}{bug_reports_message()}')
|
||||
return
|
||||
|
||||
cache_key = self._generate_key(self._generate_key_bindings(spec))
|
||||
self.logger.trace(f'Attempting to access PO Token cache using key: {cache_key}')
|
||||
|
||||
default_expires_at = int(dt.datetime.now(dt.timezone.utc).timestamp()) + spec.default_ttl
|
||||
cache_response = dataclasses.replace(response, expires_at=response.expires_at or default_expires_at)
|
||||
|
||||
write_policy = write_policy or spec.write_policy
|
||||
self.logger.trace(f'Using write policy: {write_policy}')
|
||||
|
||||
for idx, provider in enumerate(self._get_cache_providers(request)):
|
||||
try:
|
||||
self.logger.trace(
|
||||
f'Caching PO Token response in "{provider.PROVIDER_NAME}" cache provider '
|
||||
f'(key={cache_key}, expires_at={cache_response.expires_at})')
|
||||
provider.store(
|
||||
key=cache_key,
|
||||
value=json.dumps(dataclasses.asdict(cache_response)),
|
||||
expires_at=cache_response.expires_at)
|
||||
except PoTokenCacheProviderError as e:
|
||||
self.logger.warning(
|
||||
f'Error from "{provider.PROVIDER_NAME}" PO Token cache provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider) if not e.expected else ""}')
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f'Error occurred with "{provider.PROVIDER_NAME}" PO Token cache provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider)}')
|
||||
|
||||
# WRITE_FIRST should not write to lower priority providers in the case the highest priority provider fails
|
||||
if idx == 0 and write_policy == CacheProviderWritePolicy.WRITE_FIRST:
|
||||
return
|
||||
|
||||
def close(self):
|
||||
for provider in self.cache_providers.values():
|
||||
provider.close()
|
||||
for spec_provider in self.cache_spec_providers.values():
|
||||
spec_provider.close()
|
||||
|
||||
|
||||
class PoTokenRequestDirector:
|
||||
|
||||
def __init__(self, logger: IEContentProviderLogger, cache: PoTokenCache):
|
||||
self.providers: dict[str, PoTokenProvider] = {}
|
||||
self.preferences: list[Preference] = []
|
||||
self.cache = cache
|
||||
self.logger = logger
|
||||
|
||||
def register_provider(self, provider: PoTokenProvider):
|
||||
self.providers[provider.PROVIDER_KEY] = provider
|
||||
|
||||
def register_preference(self, preference: Preference):
|
||||
self.preferences.append(preference)
|
||||
|
||||
def _get_providers(self, request: PoTokenRequest) -> Iterable[PoTokenProvider]:
|
||||
"""Sorts available providers by preference, given a request"""
|
||||
preferences = {
|
||||
provider: sum(pref(provider, request) for pref in self.preferences)
|
||||
for provider in self.providers.values()
|
||||
}
|
||||
if self.logger.log_level <= self.logger.LogLevel.TRACE:
|
||||
# calling is_available() for every PO Token provider upfront may have some overhead
|
||||
self.logger.trace(f'PO Token Providers: {provider_display_list(self.providers.values())}')
|
||||
self.logger.trace('Provider preferences for this request: {}'.format(', '.join(
|
||||
f'{provider.PROVIDER_NAME}={pref}' for provider, pref in preferences.items())))
|
||||
|
||||
return (
|
||||
provider for provider in sorted(
|
||||
self.providers.values(), key=preferences.get, reverse=True)
|
||||
if provider.is_available()
|
||||
)
|
||||
|
||||
def _get_po_token(self, request) -> PoTokenResponse | None:
|
||||
for provider in self._get_providers(request):
|
||||
try:
|
||||
self.logger.trace(
|
||||
f'Attempting to fetch a PO Token from "{provider.PROVIDER_NAME}" provider')
|
||||
response = provider.request_pot(request.copy())
|
||||
except PoTokenProviderRejectedRequest as e:
|
||||
self.logger.trace(
|
||||
f'PO Token Provider "{provider.PROVIDER_NAME}" rejected this request, '
|
||||
f'trying next available provider. Reason: {e}')
|
||||
continue
|
||||
except PoTokenProviderError as e:
|
||||
self.logger.warning(
|
||||
f'Error fetching PO Token from "{provider.PROVIDER_NAME}" provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider) if not e.expected else ""}')
|
||||
continue
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f'Unexpected error when fetching PO Token from "{provider.PROVIDER_NAME}" provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider)}')
|
||||
continue
|
||||
|
||||
self.logger.trace(f'PO Token response from "{provider.PROVIDER_NAME}" provider: {response}')
|
||||
|
||||
if not validate_response(response):
|
||||
self.logger.error(
|
||||
f'Invalid PO Token response received from "{provider.PROVIDER_NAME}" provider: '
|
||||
f'{response}{provider_bug_report_message(provider)}')
|
||||
continue
|
||||
|
||||
return response
|
||||
|
||||
self.logger.trace('No PO Token providers were able to provide a valid PO Token')
|
||||
return None
|
||||
|
||||
def get_po_token(self, request: PoTokenRequest) -> str | None:
|
||||
if not request.bypass_cache:
|
||||
if pot_response := self.cache.get(request):
|
||||
return clean_pot(pot_response.po_token)
|
||||
|
||||
if not self.providers:
|
||||
self.logger.trace('No PO Token providers registered')
|
||||
return None
|
||||
|
||||
pot_response = self._get_po_token(request)
|
||||
if not pot_response:
|
||||
return None
|
||||
|
||||
pot_response.po_token = clean_pot(pot_response.po_token)
|
||||
|
||||
if pot_response.expires_at is None or pot_response.expires_at > 0:
|
||||
self.cache.store(request, pot_response)
|
||||
else:
|
||||
self.logger.trace(
|
||||
f'PO Token response will not be cached (expires_at={pot_response.expires_at})')
|
||||
|
||||
return pot_response.po_token
|
||||
|
||||
def close(self):
|
||||
for provider in self.providers.values():
|
||||
provider.close()
|
||||
self.cache.close()
|
||||
|
||||
|
||||
EXTRACTOR_ARG_PREFIX = 'youtubepot'
|
||||
|
||||
|
||||
def initialize_pot_director(ie):
|
||||
assert ie._downloader is not None, 'Downloader not set'
|
||||
|
||||
enable_trace = ie._configuration_arg(
|
||||
'pot_trace', ['false'], ie_key='youtube', casesense=False)[0] == 'true'
|
||||
|
||||
if enable_trace:
|
||||
log_level = IEContentProviderLogger.LogLevel.TRACE
|
||||
elif ie.get_param('verbose', False):
|
||||
log_level = IEContentProviderLogger.LogLevel.DEBUG
|
||||
else:
|
||||
log_level = IEContentProviderLogger.LogLevel.INFO
|
||||
|
||||
def get_provider_logger_and_settings(provider, logger_key):
|
||||
logger_prefix = f'{logger_key}:{provider.PROVIDER_NAME}'
|
||||
extractor_key = f'{EXTRACTOR_ARG_PREFIX}-{provider.PROVIDER_KEY.lower()}'
|
||||
return (
|
||||
YoutubeIEContentProviderLogger(ie, logger_prefix, log_level=log_level),
|
||||
ie.get_param('extractor_args', {}).get(extractor_key, {}))
|
||||
|
||||
cache_providers = []
|
||||
for cache_provider in _pot_cache_providers.value.values():
|
||||
logger, settings = get_provider_logger_and_settings(cache_provider, 'pot:cache')
|
||||
cache_providers.append(cache_provider(ie, logger, settings))
|
||||
cache_spec_providers = []
|
||||
for cache_spec_provider in _pot_pcs_providers.value.values():
|
||||
logger, settings = get_provider_logger_and_settings(cache_spec_provider, 'pot:cache:spec')
|
||||
cache_spec_providers.append(cache_spec_provider(ie, logger, settings))
|
||||
|
||||
cache = PoTokenCache(
|
||||
logger=YoutubeIEContentProviderLogger(ie, 'pot:cache', log_level=log_level),
|
||||
cache_providers=cache_providers,
|
||||
cache_spec_providers=cache_spec_providers,
|
||||
cache_provider_preferences=list(_pot_cache_provider_preferences.value),
|
||||
)
|
||||
|
||||
director = PoTokenRequestDirector(
|
||||
logger=YoutubeIEContentProviderLogger(ie, 'pot', log_level=log_level),
|
||||
cache=cache,
|
||||
)
|
||||
|
||||
ie._downloader.add_close_hook(director.close)
|
||||
|
||||
for provider in _pot_providers.value.values():
|
||||
logger, settings = get_provider_logger_and_settings(provider, 'pot')
|
||||
director.register_provider(provider(ie, logger, settings))
|
||||
|
||||
for preference in _ptp_preferences.value:
|
||||
director.register_preference(preference)
|
||||
|
||||
if director.logger.log_level <= director.logger.LogLevel.DEBUG:
|
||||
# calling is_available() for every PO Token provider upfront may have some overhead
|
||||
director.logger.debug(f'PO Token Providers: {provider_display_list(director.providers.values())}')
|
||||
director.logger.debug(f'PO Token Cache Providers: {provider_display_list(cache.cache_providers.values())}')
|
||||
director.logger.debug(f'PO Token Cache Spec Providers: {provider_display_list(cache.cache_spec_providers.values())}')
|
||||
director.logger.trace(f'Registered {len(director.preferences)} provider preferences')
|
||||
director.logger.trace(f'Registered {len(cache.cache_provider_preferences)} cache provider preferences')
|
||||
|
||||
return director
|
||||
|
||||
|
||||
def provider_display_list(providers: Iterable[IEContentProvider]):
|
||||
def provider_display_name(provider):
|
||||
display_str = join_nonempty(
|
||||
provider.PROVIDER_NAME,
|
||||
provider.PROVIDER_VERSION if not isinstance(provider, BuiltinIEContentProvider) else None)
|
||||
statuses = []
|
||||
if not isinstance(provider, BuiltinIEContentProvider):
|
||||
statuses.append('external')
|
||||
if not provider.is_available():
|
||||
statuses.append('unavailable')
|
||||
if statuses:
|
||||
display_str += f' ({", ".join(statuses)})'
|
||||
return display_str
|
||||
|
||||
return ', '.join(provider_display_name(provider) for provider in providers) or 'none'
|
||||
|
||||
|
||||
def clean_pot(po_token: str):
|
||||
# Clean and validate the PO Token. This will strip invalid characters off
|
||||
# (e.g. additional url params the user may accidentally include)
|
||||
try:
|
||||
return base64.urlsafe_b64encode(
|
||||
base64.urlsafe_b64decode(urllib.parse.unquote(po_token))).decode()
|
||||
except (binascii.Error, ValueError):
|
||||
raise ValueError('Invalid PO Token')
|
||||
|
||||
|
||||
def validate_response(response: PoTokenResponse | None):
|
||||
if (
|
||||
not isinstance(response, PoTokenResponse)
|
||||
or not isinstance(response.po_token, str)
|
||||
or not response.po_token
|
||||
): # noqa: SIM103
|
||||
return False
|
||||
|
||||
try:
|
||||
clean_pot(response.po_token)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
if not isinstance(response.expires_at, int):
|
||||
return response.expires_at is None
|
||||
|
||||
return response.expires_at <= 0 or response.expires_at > int(dt.datetime.now(dt.timezone.utc).timestamp())
|
||||
|
||||
|
||||
def validate_cache_spec(spec: PoTokenCacheSpec):
|
||||
return (
|
||||
isinstance(spec, PoTokenCacheSpec)
|
||||
and isinstance(spec.write_policy, CacheProviderWritePolicy)
|
||||
and isinstance(spec.default_ttl, int)
|
||||
and isinstance(spec.key_bindings, dict)
|
||||
and all(isinstance(k, str) for k in spec.key_bindings)
|
||||
and all(v is None or isinstance(v, str) for v in spec.key_bindings.values())
|
||||
and bool([v for v in spec.key_bindings.values() if v is not None])
|
||||
)
|
||||
156
yt_dlp/extractor/youtube/pot/_provider.py
Normal file
156
yt_dlp/extractor/youtube/pot/_provider.py
Normal file
@@ -0,0 +1,156 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import enum
|
||||
import functools
|
||||
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
from yt_dlp.utils import NO_DEFAULT, bug_reports_message, classproperty, traverse_obj
|
||||
from yt_dlp.version import __version__
|
||||
|
||||
# xxx: these could be generalized outside YoutubeIE eventually
|
||||
|
||||
|
||||
class IEContentProviderLogger(abc.ABC):
|
||||
|
||||
class LogLevel(enum.IntEnum):
|
||||
TRACE = 0
|
||||
DEBUG = 10
|
||||
INFO = 20
|
||||
WARNING = 30
|
||||
ERROR = 40
|
||||
|
||||
@classmethod
|
||||
def _missing_(cls, value):
|
||||
if isinstance(value, str):
|
||||
value = value.upper()
|
||||
if value in dir(cls):
|
||||
return cls[value]
|
||||
|
||||
return cls.INFO
|
||||
|
||||
log_level = LogLevel.INFO
|
||||
|
||||
@abc.abstractmethod
|
||||
def trace(self, message: str):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def debug(self, message: str):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def info(self, message: str):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def warning(self, message: str, *, once=False):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def error(self, message: str):
|
||||
pass
|
||||
|
||||
|
||||
class IEContentProviderError(Exception):
|
||||
def __init__(self, msg=None, expected=False):
|
||||
super().__init__(msg)
|
||||
self.expected = expected
|
||||
|
||||
|
||||
class IEContentProvider(abc.ABC):
|
||||
PROVIDER_VERSION: str = '0.0.0'
|
||||
BUG_REPORT_LOCATION: str = '(developer has not provided a bug report location)'
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ie: InfoExtractor,
|
||||
logger: IEContentProviderLogger,
|
||||
settings: dict[str, list[str]], *_, **__,
|
||||
):
|
||||
self.ie = ie
|
||||
self.settings = settings or {}
|
||||
self.logger = logger
|
||||
super().__init__()
|
||||
|
||||
@classmethod
|
||||
def __init_subclass__(cls, *, suffix=None, **kwargs):
|
||||
if suffix:
|
||||
cls._PROVIDER_KEY_SUFFIX = suffix
|
||||
return super().__init_subclass__(**kwargs)
|
||||
|
||||
@classproperty
|
||||
def PROVIDER_NAME(cls) -> str:
|
||||
return cls.__name__[:-len(cls._PROVIDER_KEY_SUFFIX)]
|
||||
|
||||
@classproperty
|
||||
def BUG_REPORT_MESSAGE(cls):
|
||||
return f'please report this issue to the provider developer at {cls.BUG_REPORT_LOCATION} .'
|
||||
|
||||
@classproperty
|
||||
def PROVIDER_KEY(cls) -> str:
|
||||
assert hasattr(cls, '_PROVIDER_KEY_SUFFIX'), 'Content Provider implementation must define a suffix for the provider key'
|
||||
assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'PoTokenProvider class names must end with "{cls._PROVIDER_KEY_SUFFIX}"'
|
||||
return cls.__name__[:-len(cls._PROVIDER_KEY_SUFFIX)]
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the provider is available (e.g. all required dependencies are available)
|
||||
This is used to determine if the provider should be used and to provide debug information.
|
||||
|
||||
IMPORTANT: This method should not make any network requests or perform any expensive operations.
|
||||
It is called multiple times.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def close(self): # noqa: B027
|
||||
pass
|
||||
|
||||
def _configuration_arg(self, key, default=NO_DEFAULT, *, casesense=False):
|
||||
"""
|
||||
@returns A list of values for the setting given by "key"
|
||||
or "default" if no such key is present
|
||||
@param default The default value to return when the key is not present (default: [])
|
||||
@param casesense When false, the values are converted to lower case
|
||||
"""
|
||||
val = traverse_obj(self.settings, key)
|
||||
if val is None:
|
||||
return [] if default is NO_DEFAULT else default
|
||||
return list(val) if casesense else [x.lower() for x in val]
|
||||
|
||||
|
||||
class BuiltinIEContentProvider(IEContentProvider, abc.ABC):
|
||||
PROVIDER_VERSION = __version__
|
||||
BUG_REPORT_MESSAGE = bug_reports_message(before='')
|
||||
|
||||
|
||||
def register_provider_generic(
|
||||
provider,
|
||||
base_class,
|
||||
registry,
|
||||
):
|
||||
"""Generic function to register a provider class"""
|
||||
assert issubclass(provider, base_class), f'{provider} must be a subclass of {base_class.__name__}'
|
||||
assert provider.PROVIDER_KEY not in registry, f'{base_class.__name__} {provider.PROVIDER_KEY} already registered'
|
||||
registry[provider.PROVIDER_KEY] = provider
|
||||
return provider
|
||||
|
||||
|
||||
def register_preference_generic(
|
||||
base_class,
|
||||
registry,
|
||||
*providers,
|
||||
):
|
||||
"""Generic function to register a preference for a provider"""
|
||||
assert all(issubclass(provider, base_class) for provider in providers)
|
||||
|
||||
def outer(preference):
|
||||
@functools.wraps(preference)
|
||||
def inner(provider, *args, **kwargs):
|
||||
if not providers or isinstance(provider, providers):
|
||||
return preference(provider, *args, **kwargs)
|
||||
return 0
|
||||
registry.add(inner)
|
||||
return preference
|
||||
return outer
|
||||
8
yt_dlp/extractor/youtube/pot/_registry.py
Normal file
8
yt_dlp/extractor/youtube/pot/_registry.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from yt_dlp.globals import Indirect
|
||||
|
||||
_pot_providers = Indirect({})
|
||||
_ptp_preferences = Indirect(set())
|
||||
_pot_pcs_providers = Indirect({})
|
||||
_pot_cache_providers = Indirect({})
|
||||
_pot_cache_provider_preferences = Indirect(set())
|
||||
_pot_memory_cache = Indirect({})
|
||||
97
yt_dlp/extractor/youtube/pot/cache.py
Normal file
97
yt_dlp/extractor/youtube/pot/cache.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""PUBLIC API"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import dataclasses
|
||||
import enum
|
||||
import typing
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import (
|
||||
IEContentProvider,
|
||||
IEContentProviderError,
|
||||
register_preference_generic,
|
||||
register_provider_generic,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._registry import (
|
||||
_pot_cache_provider_preferences,
|
||||
_pot_cache_providers,
|
||||
_pot_pcs_providers,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest
|
||||
|
||||
|
||||
class PoTokenCacheProviderError(IEContentProviderError):
|
||||
"""An error occurred while fetching a PO Token"""
|
||||
|
||||
|
||||
class PoTokenCacheProvider(IEContentProvider, abc.ABC, suffix='PCP'):
|
||||
@abc.abstractmethod
|
||||
def get(self, key: str) -> str | None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
|
||||
class CacheProviderWritePolicy(enum.Enum):
|
||||
WRITE_ALL = enum.auto() # Write to all cache providers
|
||||
WRITE_FIRST = enum.auto() # Write to only the first cache provider
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PoTokenCacheSpec:
|
||||
key_bindings: dict[str, str | None]
|
||||
default_ttl: int
|
||||
write_policy: CacheProviderWritePolicy = CacheProviderWritePolicy.WRITE_ALL
|
||||
|
||||
# Internal
|
||||
_provider: PoTokenCacheSpecProvider | None = None
|
||||
|
||||
|
||||
class PoTokenCacheSpecProvider(IEContentProvider, abc.ABC, suffix='PCSP'):
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate_cache_spec(self, request: PoTokenRequest) -> PoTokenCacheSpec | None:
|
||||
"""Generate a cache spec for the given request"""
|
||||
pass
|
||||
|
||||
|
||||
def register_provider(provider: type[PoTokenCacheProvider]):
|
||||
"""Register a PoTokenCacheProvider class"""
|
||||
return register_provider_generic(
|
||||
provider=provider,
|
||||
base_class=PoTokenCacheProvider,
|
||||
registry=_pot_cache_providers.value,
|
||||
)
|
||||
|
||||
|
||||
def register_spec(provider: type[PoTokenCacheSpecProvider]):
|
||||
"""Register a PoTokenCacheSpecProvider class"""
|
||||
return register_provider_generic(
|
||||
provider=provider,
|
||||
base_class=PoTokenCacheSpecProvider,
|
||||
registry=_pot_pcs_providers.value,
|
||||
)
|
||||
|
||||
|
||||
def register_preference(
|
||||
*providers: type[PoTokenCacheProvider]) -> typing.Callable[[CacheProviderPreference], CacheProviderPreference]:
|
||||
"""Register a preference for a PoTokenCacheProvider"""
|
||||
return register_preference_generic(
|
||||
PoTokenCacheProvider,
|
||||
_pot_cache_provider_preferences.value,
|
||||
*providers,
|
||||
)
|
||||
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
CacheProviderPreference = typing.Callable[[PoTokenCacheProvider, PoTokenRequest], int]
|
||||
281
yt_dlp/extractor/youtube/pot/provider.py
Normal file
281
yt_dlp/extractor/youtube/pot/provider.py
Normal file
@@ -0,0 +1,281 @@
|
||||
"""PUBLIC API"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import copy
|
||||
import dataclasses
|
||||
import enum
|
||||
import functools
|
||||
import typing
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.extractor.youtube.pot._provider import (
|
||||
IEContentProvider,
|
||||
IEContentProviderError,
|
||||
register_preference_generic,
|
||||
register_provider_generic,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences
|
||||
from yt_dlp.networking import Request, Response
|
||||
from yt_dlp.utils import traverse_obj
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
||||
__all__ = [
|
||||
'ExternalRequestFeature',
|
||||
'PoTokenContext',
|
||||
'PoTokenProvider',
|
||||
'PoTokenProviderError',
|
||||
'PoTokenProviderRejectedRequest',
|
||||
'PoTokenRequest',
|
||||
'PoTokenResponse',
|
||||
'provider_bug_report_message',
|
||||
'register_preference',
|
||||
'register_provider',
|
||||
]
|
||||
|
||||
|
||||
class PoTokenContext(enum.Enum):
|
||||
GVS = 'gvs'
|
||||
PLAYER = 'player'
|
||||
SUBS = 'subs'
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PoTokenRequest:
|
||||
# YouTube parameters
|
||||
context: PoTokenContext
|
||||
innertube_context: InnertubeContext
|
||||
innertube_host: str | None = None
|
||||
session_index: str | None = None
|
||||
player_url: str | None = None
|
||||
is_authenticated: bool = False
|
||||
video_webpage: str | None = None
|
||||
internal_client_name: str | None = None
|
||||
|
||||
# Content binding parameters
|
||||
visitor_data: str | None = None
|
||||
data_sync_id: str | None = None
|
||||
video_id: str | None = None
|
||||
|
||||
# Networking parameters
|
||||
request_cookiejar: YoutubeDLCookieJar = dataclasses.field(default_factory=YoutubeDLCookieJar)
|
||||
request_proxy: str | None = None
|
||||
request_headers: HTTPHeaderDict = dataclasses.field(default_factory=HTTPHeaderDict)
|
||||
request_timeout: float | None = None
|
||||
request_source_address: str | None = None
|
||||
request_verify_tls: bool = True
|
||||
|
||||
# Generate a new token, do not used a cached token
|
||||
# The token should still be cached for future requests
|
||||
bypass_cache: bool = False
|
||||
|
||||
def copy(self):
|
||||
return dataclasses.replace(
|
||||
self,
|
||||
request_headers=HTTPHeaderDict(self.request_headers),
|
||||
innertube_context=copy.deepcopy(self.innertube_context),
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PoTokenResponse:
|
||||
po_token: str
|
||||
expires_at: int | None = None
|
||||
|
||||
|
||||
class PoTokenProviderRejectedRequest(IEContentProviderError):
|
||||
"""Reject the PoTokenRequest (cannot handle the request)"""
|
||||
|
||||
|
||||
class PoTokenProviderError(IEContentProviderError):
|
||||
"""An error occurred while fetching a PO Token"""
|
||||
|
||||
|
||||
class ExternalRequestFeature(enum.Enum):
|
||||
PROXY_SCHEME_HTTP = enum.auto()
|
||||
PROXY_SCHEME_HTTPS = enum.auto()
|
||||
PROXY_SCHEME_SOCKS4 = enum.auto()
|
||||
PROXY_SCHEME_SOCKS4A = enum.auto()
|
||||
PROXY_SCHEME_SOCKS5 = enum.auto()
|
||||
PROXY_SCHEME_SOCKS5H = enum.auto()
|
||||
SOURCE_ADDRESS = enum.auto()
|
||||
DISABLE_TLS_VERIFICATION = enum.auto()
|
||||
|
||||
|
||||
class PoTokenProvider(IEContentProvider, abc.ABC, suffix='PTP'):
|
||||
|
||||
# Set to None to disable the check
|
||||
_SUPPORTED_CONTEXTS: tuple[PoTokenContext] | None = ()
|
||||
|
||||
# Innertube Client Name.
|
||||
# For example, "WEB", "ANDROID", "TVHTML5".
|
||||
# For a list of WebPO client names, see yt_dlp.extractor.youtube.pot.utils.WEBPO_CLIENTS.
|
||||
# Also see yt_dlp.extractor.youtube._base.INNERTUBE_CLIENTS
|
||||
# for a list of client names currently supported by the YouTube extractor.
|
||||
_SUPPORTED_CLIENTS: tuple[str] | None = ()
|
||||
|
||||
# If making external requests to websites (i.e. to youtube.com)
|
||||
# using another library or service (i.e., not _request_webpage),
|
||||
# add the request features that are supported.
|
||||
# If only using _request_webpage to make external requests, set this to None.
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES: tuple[ExternalRequestFeature] | None = ()
|
||||
|
||||
def __validate_request(self, request: PoTokenRequest):
|
||||
if not self.is_available():
|
||||
raise PoTokenProviderRejectedRequest(f'{self.PROVIDER_NAME} is not available')
|
||||
|
||||
# Validate request using built-in settings
|
||||
if (
|
||||
self._SUPPORTED_CONTEXTS is not None
|
||||
and request.context not in self._SUPPORTED_CONTEXTS
|
||||
):
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'PO Token Context "{request.context}" is not supported by {self.PROVIDER_NAME}')
|
||||
|
||||
if self._SUPPORTED_CLIENTS is not None:
|
||||
client_name = traverse_obj(
|
||||
request.innertube_context, ('client', 'clientName'))
|
||||
if client_name not in self._SUPPORTED_CLIENTS:
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'Client "{client_name}" is not supported by {self.PROVIDER_NAME}. '
|
||||
f'Supported clients: {", ".join(self._SUPPORTED_CLIENTS) or "none"}')
|
||||
|
||||
self.__validate_external_request_features(request)
|
||||
|
||||
@functools.cached_property
|
||||
def _supported_proxy_schemes(self):
|
||||
return {
|
||||
scheme: feature
|
||||
for scheme, feature in {
|
||||
'http': ExternalRequestFeature.PROXY_SCHEME_HTTP,
|
||||
'https': ExternalRequestFeature.PROXY_SCHEME_HTTPS,
|
||||
'socks4': ExternalRequestFeature.PROXY_SCHEME_SOCKS4,
|
||||
'socks4a': ExternalRequestFeature.PROXY_SCHEME_SOCKS4A,
|
||||
'socks5': ExternalRequestFeature.PROXY_SCHEME_SOCKS5,
|
||||
'socks5h': ExternalRequestFeature.PROXY_SCHEME_SOCKS5H,
|
||||
}.items()
|
||||
if feature in (self._SUPPORTED_EXTERNAL_REQUEST_FEATURES or [])
|
||||
}
|
||||
|
||||
def __validate_external_request_features(self, request: PoTokenRequest):
|
||||
if self._SUPPORTED_EXTERNAL_REQUEST_FEATURES is None:
|
||||
return
|
||||
|
||||
if request.request_proxy:
|
||||
scheme = urllib.parse.urlparse(request.request_proxy).scheme
|
||||
if scheme.lower() not in self._supported_proxy_schemes:
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'External requests by "{self.PROVIDER_NAME}" provider do not '
|
||||
f'support proxy scheme "{scheme}". Supported proxy schemes: '
|
||||
f'{", ".join(self._supported_proxy_schemes) or "none"}')
|
||||
|
||||
if (
|
||||
request.request_source_address
|
||||
and ExternalRequestFeature.SOURCE_ADDRESS not in self._SUPPORTED_EXTERNAL_REQUEST_FEATURES
|
||||
):
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'External requests by "{self.PROVIDER_NAME}" provider '
|
||||
f'do not support setting source address')
|
||||
|
||||
if (
|
||||
not request.request_verify_tls
|
||||
and ExternalRequestFeature.DISABLE_TLS_VERIFICATION not in self._SUPPORTED_EXTERNAL_REQUEST_FEATURES
|
||||
):
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'External requests by "{self.PROVIDER_NAME}" provider '
|
||||
f'do not support ignoring TLS certificate failures')
|
||||
|
||||
def request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
self.__validate_request(request)
|
||||
return self._real_request_pot(request)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
"""To be implemented by subclasses"""
|
||||
pass
|
||||
|
||||
# Helper functions
|
||||
|
||||
def _request_webpage(self, request: Request, pot_request: PoTokenRequest | None = None, note=None, **kwargs) -> Response:
|
||||
"""Make a request using the internal HTTP Client.
|
||||
Use this instead of calling requests, urllib3 or other HTTP client libraries directly!
|
||||
|
||||
YouTube cookies will be automatically applied if this request is made to YouTube.
|
||||
|
||||
@param request: The request to make
|
||||
@param pot_request: The PoTokenRequest to use. Request parameters will be merged from it.
|
||||
@param note: Custom log message to display when making the request. Set to `False` to disable logging.
|
||||
|
||||
Tips:
|
||||
- Disable proxy (e.g. if calling local service): Request(..., proxies={'all': None})
|
||||
- Set request timeout: Request(..., extensions={'timeout': 5.0})
|
||||
"""
|
||||
req = request.copy()
|
||||
|
||||
# Merge some ctx request settings into the request
|
||||
# Most of these will already be used by the configured ydl instance,
|
||||
# however, the YouTube extractor may override some.
|
||||
if pot_request is not None:
|
||||
req.headers = HTTPHeaderDict(pot_request.request_headers, req.headers)
|
||||
req.proxies = req.proxies or ({'all': pot_request.request_proxy} if pot_request.request_proxy else {})
|
||||
|
||||
if pot_request.request_cookiejar is not None:
|
||||
req.extensions['cookiejar'] = req.extensions.get('cookiejar', pot_request.request_cookiejar)
|
||||
|
||||
if note is not False:
|
||||
self.logger.info(str(note) if note else 'Requesting webpage')
|
||||
return self.ie._downloader.urlopen(req)
|
||||
|
||||
|
||||
def register_provider(provider: type[PoTokenProvider]):
|
||||
"""Register a PoTokenProvider class"""
|
||||
return register_provider_generic(
|
||||
provider=provider,
|
||||
base_class=PoTokenProvider,
|
||||
registry=_pot_providers.value,
|
||||
)
|
||||
|
||||
|
||||
def provider_bug_report_message(provider: IEContentProvider, before=';'):
|
||||
msg = provider.BUG_REPORT_MESSAGE
|
||||
|
||||
before = before.rstrip()
|
||||
if not before or before.endswith(('.', '!', '?')):
|
||||
msg = msg[0].title() + msg[1:]
|
||||
|
||||
return f'{before} {msg}' if before else msg
|
||||
|
||||
|
||||
def register_preference(*providers: type[PoTokenProvider]) -> typing.Callable[[Preference], Preference]:
|
||||
"""Register a preference for a PoTokenProvider"""
|
||||
return register_preference_generic(
|
||||
PoTokenProvider,
|
||||
_ptp_preferences.value,
|
||||
*providers,
|
||||
)
|
||||
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
Preference = typing.Callable[[PoTokenProvider, PoTokenRequest], int]
|
||||
__all__.append('Preference')
|
||||
|
||||
# Barebones innertube context. There may be more fields.
|
||||
class ClientInfo(typing.TypedDict, total=False):
|
||||
hl: str | None
|
||||
gl: str | None
|
||||
remoteHost: str | None
|
||||
deviceMake: str | None
|
||||
deviceModel: str | None
|
||||
visitorData: str | None
|
||||
userAgent: str | None
|
||||
clientName: str
|
||||
clientVersion: str
|
||||
osName: str | None
|
||||
osVersion: str | None
|
||||
|
||||
class InnertubeContext(typing.TypedDict, total=False):
|
||||
client: ClientInfo
|
||||
request: dict
|
||||
user: dict
|
||||
73
yt_dlp/extractor/youtube/pot/utils.py
Normal file
73
yt_dlp/extractor/youtube/pot/utils.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""PUBLIC API"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import contextlib
|
||||
import enum
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenContext, PoTokenRequest
|
||||
from yt_dlp.utils import traverse_obj
|
||||
|
||||
__all__ = ['WEBPO_CLIENTS', 'ContentBindingType', 'get_webpo_content_binding']
|
||||
|
||||
WEBPO_CLIENTS = (
|
||||
'WEB',
|
||||
'MWEB',
|
||||
'TVHTML5',
|
||||
'WEB_EMBEDDED_PLAYER',
|
||||
'WEB_CREATOR',
|
||||
'WEB_REMIX',
|
||||
'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
|
||||
)
|
||||
|
||||
|
||||
class ContentBindingType(enum.Enum):
|
||||
VISITOR_DATA = 'visitor_data'
|
||||
DATASYNC_ID = 'datasync_id'
|
||||
VIDEO_ID = 'video_id'
|
||||
VISITOR_ID = 'visitor_id'
|
||||
|
||||
|
||||
def get_webpo_content_binding(
|
||||
request: PoTokenRequest,
|
||||
webpo_clients=WEBPO_CLIENTS,
|
||||
bind_to_visitor_id=False,
|
||||
) -> tuple[str | None, ContentBindingType | None]:
|
||||
|
||||
client_name = traverse_obj(request.innertube_context, ('client', 'clientName'))
|
||||
if not client_name or client_name not in webpo_clients:
|
||||
return None, None
|
||||
|
||||
if request.context == PoTokenContext.GVS or client_name in ('WEB_REMIX', ):
|
||||
if request.is_authenticated:
|
||||
return request.data_sync_id, ContentBindingType.DATASYNC_ID
|
||||
else:
|
||||
if bind_to_visitor_id:
|
||||
visitor_id = _extract_visitor_id(request.visitor_data)
|
||||
if visitor_id:
|
||||
return visitor_id, ContentBindingType.VISITOR_ID
|
||||
return request.visitor_data, ContentBindingType.VISITOR_DATA
|
||||
|
||||
elif request.context in (PoTokenContext.PLAYER, PoTokenContext.SUBS):
|
||||
return request.video_id, ContentBindingType.VIDEO_ID
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def _extract_visitor_id(visitor_data):
|
||||
if not visitor_data:
|
||||
return None
|
||||
|
||||
# Attempt to extract the visitor ID from the visitor_data protobuf
|
||||
# xxx: ideally should use a protobuf parser
|
||||
with contextlib.suppress(Exception):
|
||||
visitor_id = base64.urlsafe_b64decode(
|
||||
urllib.parse.unquote_plus(visitor_data))[2:13].decode()
|
||||
# check that visitor id is all letters and numbers
|
||||
if re.fullmatch(r'[A-Za-z0-9_-]{11}', visitor_id):
|
||||
return visitor_id
|
||||
|
||||
return None
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user