1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-02-09 15:37:19 +00:00

Merge remote-tracking branch 'origin/master' into tvw-news

This commit is contained in:
Fries
2025-05-11 15:49:54 -07:00
49 changed files with 2013 additions and 1428 deletions

View File

@@ -338,7 +338,6 @@ from .canalc2 import Canalc2IE
from .canalplus import CanalplusIE
from .canalsurmas import CanalsurmasIE
from .caracoltv import CaracolTvPlayIE
from .cartoonnetwork import CartoonNetworkIE
from .cbc import (
CBCIE,
CBCGemIE,
@@ -929,7 +928,10 @@ from .jiocinema import (
)
from .jiosaavn import (
JioSaavnAlbumIE,
JioSaavnArtistIE,
JioSaavnPlaylistIE,
JioSaavnShowIE,
JioSaavnShowPlaylistIE,
JioSaavnSongIE,
)
from .joj import JojIE
@@ -1042,6 +1044,7 @@ from .limelight import (
LimelightMediaIE,
)
from .linkedin import (
LinkedInEventsIE,
LinkedInIE,
LinkedInLearningCourseIE,
LinkedInLearningIE,
@@ -1963,7 +1966,6 @@ from .spreaker import (
SpreakerShowIE,
)
from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
from .sproutvideo import (
SproutVideoIE,
VidsIoIE,

View File

@@ -1,32 +1,24 @@
import re
from .theplatform import ThePlatformIE
from ..utils import (
int_or_none,
parse_age_limit,
try_get,
update_url_query,
)
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils.traversal import traverse_obj
class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
class AMCNetworksIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/?#]+)+)/[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
'url': 'https://www.amc.com/shows/dark-winds/videos/dark-winds-a-look-at-season-3--1072027',
'info_dict': {
'id': '4Lq1dzOnZGt0',
'id': '6369261343112',
'ext': 'mp4',
'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
'upload_date': '20201120',
'timestamp': 1605904350,
'uploader': 'AMCN',
'title': 'Dark Winds: A Look at Season 3',
'uploader_id': '6240731308001',
'duration': 176.427,
'thumbnail': r're:https://[^/]+\.boltdns\.net/.+/image\.jpg',
'tags': [],
'timestamp': 1740414792,
'upload_date': '20250224',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': '404 Not Found',
'params': {'skip_download': 'm3u8'},
}, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True,
@@ -52,96 +44,18 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
'only_matching': True,
}]
_REQUESTOR_ID_MAP = {
'amc': 'AMC',
'bbcamerica': 'BBCA',
'ifc': 'IFC',
'sundancetv': 'SUNDANCE',
'wetv': 'WETV',
}
def _real_extract(self, url):
site, display_id = self._match_valid_url(url).groups()
requestor_id = self._REQUESTOR_ID_MAP[site]
page_data = self._download_json(
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
display_id)['data']
properties = page_data.get('properties') or {}
query = {
'mbr': 'true',
'manifest': 'm3u',
}
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
initial_data = self._search_json(
r'window\.initialData\s*=\s*JSON\.parse\(String\.raw`', webpage, 'initial data', display_id)
video_id = traverse_obj(initial_data, ('initialData', 'properties', 'videoId', {str}))
if not video_id: # All locked videos are now DRM-protected
self.report_drm(display_id)
account_id = initial_data['config']['brightcove']['accountId']
player_id = initial_data['config']['brightcove']['playerId']
video_player_count = 0
try:
for v in page_data['children']:
if v.get('type') == 'video-player':
release_pid = v['properties']['currentVideo']['meta']['releasePid']
tp_path = 'M_UwQC/' + release_pid
media_url = 'https://link.theplatform.com/s/' + tp_path
video_player_count += 1
except KeyError:
pass
if video_player_count > 1:
self.report_warning(
f'The JSON data has {video_player_count} video players. Only one will be extracted')
# Fall back to videoPid if releasePid not found.
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
if not video_player_count:
tp_path = 'M_UwQC/media/' + properties['videoPid']
media_url = 'https://link.theplatform.com/s/' + tp_path
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
video_id = theplatform_metadata['pid']
title = theplatform_metadata['title']
rating = try_get(
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
video_category = properties.get('videoCategory')
if video_category and video_category.endswith('-Auth'):
resource = self._get_mvpd_resource(
requestor_id, title, video_id, rating)
query['auth'] = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
media_url = update_url_query(media_url, query)
formats, subtitles = self._extract_theplatform_smil(
media_url, video_id)
thumbnails = []
thumbnail_urls = [properties.get('imageDesktop')]
if 'thumbnail' in info:
thumbnail_urls.append(info.pop('thumbnail'))
for thumbnail_url in thumbnail_urls:
if not thumbnail_url:
continue
mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
thumbnails.append({
'url': thumbnail_url,
'width': int(mobj.group(1)) if mobj else None,
'height': int(mobj.group(2)) if mobj else None,
})
info.update({
'age_limit': parse_age_limit(rating),
'formats': formats,
'id': video_id,
'subtitles': subtitles,
'thumbnails': thumbnails,
})
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
if ns_keys:
ns = next(iter(ns_keys))
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
episode_number = int_or_none(
theplatform_metadata.get(ns + '$episode'))
season_number = int_or_none(
theplatform_metadata.get(ns + '$season'))
series = theplatform_metadata.get(ns + '$show') or None
info.update({
'episode': episode,
'episode_number': episode_number,
'season_number': season_number,
'series': series,
})
return info
return self.url_result(
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
BrightcoveNewIE, video_id)

View File

@@ -1,30 +1,32 @@
import functools
import json
import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
OnDemandPagedList,
clean_html,
extract_attributes,
determine_ext,
format_field,
get_element_by_class,
get_element_by_id,
get_element_html_by_class,
get_elements_html_by_class,
int_or_none,
orderedSet,
parse_count,
parse_duration,
traverse_obj,
unified_strdate,
parse_iso8601,
url_or_none,
urlencode_postdata,
urljoin,
)
from ..utils.traversal import traverse_obj
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/?#]+)/(?P<id>[^/?#&]+)'
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
@@ -34,12 +36,17 @@ class BitChuteIE(InfoExtractor):
'ext': 'mp4',
'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$',
'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute',
'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'view_count': int,
'duration': 16.0,
'timestamp': 1483425443,
},
}, {
# test case: video with different channel and uploader
@@ -49,13 +56,18 @@ class BitChuteIE(InfoExtractor):
'id': 'Yti_j9A-UZ4',
'ext': 'mp4',
'title': 'Israel at War | Full Measure',
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:e60198b89971966d6030d22b3268f08f',
'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'sharylattkisson',
'upload_date': '20231106',
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
'channel': 'Full Measure with Sharyl Attkisson',
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
'uploader_id': '9K0kUWA9zmd9',
'channel_id': 'NpdxoCRv3ZLb',
'view_count': int,
'duration': 554.0,
'timestamp': 1699296106,
},
}, {
# video not downloadable in browser, but we can recover it
@@ -66,25 +78,21 @@ class BitChuteIE(InfoExtractor):
'ext': 'mp4',
'filesize': 71537926,
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
'description': 'md5:228ee93bd840a24938f536aeac9cf749',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:2029c7c212ccd4b040f52bb2d036ef4e',
'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute',
'upload_date': '20181113',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'view_count': int,
'duration': 1701.0,
'tags': ['bitchute'],
'timestamp': 1542130287,
},
'params': {'check_formats': None},
}, {
# restricted video
'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
'info_dict': {
'id': 'WEnQU7XGcTdl',
'ext': 'mp4',
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
},
'params': {'skip_download': True},
'skip': 'Georestricted in DE',
}, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
'only_matching': True,
@@ -96,11 +104,8 @@ class BitChuteIE(InfoExtractor):
'only_matching': True,
}]
_GEO_BYPASS = False
_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
'Referer': 'https://www.bitchute.com/',
}
_UPLOADER_URL_TMPL = 'https://www.bitchute.com/profile/%s/'
_CHANNEL_URL_TMPL = 'https://www.bitchute.com/channel/%s/'
def _check_format(self, video_url, video_id):
urls = orderedSet(
@@ -112,7 +117,7 @@ class BitChuteIE(InfoExtractor):
for url in urls:
try:
response = self._request_webpage(
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
HEADRequest(url), video_id=video_id, note=f'Checking {url}')
except ExtractorError as e:
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
continue
@@ -121,54 +126,79 @@ class BitChuteIE(InfoExtractor):
'filesize': int_or_none(response.headers.get('Content-Length')),
}
def _raise_if_restricted(self, webpage):
page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
self.raise_geo_restricted(reason)
@staticmethod
def _make_url(html):
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
return urljoin('https://www.bitchute.com', path)
def _call_api(self, endpoint, data, display_id, fatal=True):
note = endpoint.rpartition('/')[2]
try:
return self._download_json(
f'https://api.bitchute.com/api/beta/{endpoint}', display_id,
f'Downloading {note} API JSON', f'Unable to download {note} API JSON',
data=json.dumps(data).encode(),
headers={
'Accept': 'application/json',
'Content-Type': 'application/json',
})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
errors = '. '.join(traverse_obj(e.cause.response.read().decode(), (
{json.loads}, 'errors', lambda _, v: v['context'] == 'reason', 'message', {str})))
if errors and 'location' in errors:
# Can always be fatal since the video/media call will reach this code first
self.raise_geo_restricted(errors)
if fatal:
raise
self.report_warning(e.msg)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
self._raise_if_restricted(webpage)
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
entries = self._parse_html5_media_entries(url, webpage, video_id)
data = {'video_id': video_id}
media_url = self._call_api('video/media', data, video_id)['media_url']
formats = []
for format_ in traverse_obj(entries, (0, 'formats', ...)):
if determine_ext(media_url) == 'm3u8':
formats.extend(
self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls', live=True))
else:
if self.get_param('check_formats') is not False:
format_.update(self._check_format(format_.pop('url'), video_id) or {})
if 'url' not in format_:
continue
formats.append(format_)
if fmt := self._check_format(media_url, video_id):
formats.append(fmt)
else:
formats.append({'url': media_url})
if not formats:
self.raise_no_formats(
'Video is unavailable. Please make sure this video is playable in the browser '
'before reporting this issue.', expected=True, video_id=video_id)
details = get_element_by_class('details', webpage) or ''
uploader_html = get_element_html_by_class('creator', details) or ''
channel_html = get_element_html_by_class('name', details) or ''
video = self._call_api('video', data, video_id, fatal=False)
channel = None
if channel_id := traverse_obj(video, ('channel', 'channel_id', {str})):
channel = self._call_api('channel', {'channel_id': channel_id}, video_id, fatal=False)
return {
**traverse_obj(video, {
'title': ('video_name', {str}),
'description': ('description', {str}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'channel': ('channel', 'channel_name', {str}),
'channel_id': ('channel', 'channel_id', {str}),
'channel_url': ('channel', 'channel_url', {urljoin('https://www.bitchute.com/')}),
'uploader_id': ('profile_id', {str}),
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
'timestamp': ('date_published', {parse_iso8601}),
'duration': ('duration', {parse_duration}),
'tags': ('hashtags', ..., {str}, filter, all, filter),
'view_count': ('view_count', {int_or_none}),
'is_live': ('state_id', {lambda x: x == 'live'}),
}),
**traverse_obj(channel, {
'channel': ('channel_name', {str}),
'channel_id': ('channel_id', {str}),
'channel_url': ('url_slug', {format_field(template=self._CHANNEL_URL_TMPL)}, filter),
'uploader': ('profile_name', {str}),
'uploader_id': ('profile_id', {str}),
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
}),
'id': video_id,
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': clean_html(uploader_html),
'uploader_url': self._make_url(uploader_html),
'channel': clean_html(channel_html),
'channel_url': self._make_url(channel_html),
'upload_date': unified_strdate(self._search_regex(
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
'formats': formats,
}
@@ -190,7 +220,7 @@ class BitChuteChannelIE(InfoExtractor):
'ext': 'mp4',
'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$',
'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute',
'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
@@ -198,6 +228,9 @@ class BitChuteChannelIE(InfoExtractor):
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'duration': 16,
'view_count': int,
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'timestamp': 1483425443,
},
},
],
@@ -213,6 +246,7 @@ class BitChuteChannelIE(InfoExtractor):
'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:747724ef404eebdfc04277714f81863e',
},
'skip': '404 Not Found',
}, {
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
'only_matching': True,

View File

@@ -7,6 +7,7 @@ from ..utils import (
join_nonempty,
js_to_json,
mimetype2ext,
parse_resolution,
unified_strdate,
url_or_none,
urljoin,
@@ -110,24 +111,23 @@ class BpbIE(InfoExtractor):
return attributes
@staticmethod
def _process_source(source):
def _process_source(self, source):
url = url_or_none(source['src'])
if not url:
return None
source_type = source.get('type', '')
extension = mimetype2ext(source_type)
is_video = source_type.startswith('video')
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
note = self._search_regex(r'[_-]([a-z]+)\.[\da-z]+(?:$|\?)', url, 'note', default=None)
return {
'url': url,
'ext': extension,
'vcodec': None if is_video else 'none',
'vcodec': None if source_type.startswith('video') else 'none',
'quality': 10 if note == 'high' else 0,
'format_note': note,
'format_id': join_nonempty(extension, note),
**parse_resolution(source.get('label')),
}
def _real_extract(self, url):

View File

@@ -1,59 +0,0 @@
from .turner import TurnerBaseIE
from ..utils import int_or_none
class CartoonNetworkIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
_TEST = {
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
'info_dict': {
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
'ext': 'mp4',
'title': 'How to Draw Upgrade',
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
metadata_re = ''
if content_re:
metadata_re = r'|video_metadata\.content_' + content_re
return self._search_regex(
rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";',
webpage, name, fatal=fatal)
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
info = self._extract_ngtv_info(
media_id, {'networkId': 'cartoonnetwork'}, {
'url': url,
'site_name': 'CartoonNetwork',
'auth_required': find_field('authType', 'auth type') != 'unauth',
})
series = find_field(
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
info.update({
'id': media_id,
'display_id': display_id,
'title': title,
'description': self._html_search_meta('description', webpage),
'series': series,
'episode': title,
})
for field in ('season', 'episode'):
field_name = field + 'Number'
info[field + '_number'] = int_or_none(find_field(
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
return info

View File

@@ -13,16 +13,17 @@ from ..compat import compat_ord
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
float_or_none,
int_or_none,
merge_dicts,
multipart_encode,
parse_duration,
traverse_obj,
try_call,
try_get,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class CDAIE(InfoExtractor):
@@ -290,34 +291,47 @@ class CDAIE(InfoExtractor):
if not video or 'file' not in video:
self.report_warning(f'Unable to extract {version} version information')
return
if video['file'].startswith('uggc'):
video['file'] = codecs.decode(video['file'], 'rot_13')
if video['file'].endswith('adc.mp4'):
video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
video_quality = video.get('quality')
qualities = video.get('qualities', {})
video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
info_dict['formats'].append({
'url': video['file'],
'format_id': video_quality,
'height': int_or_none(video_quality[:-1]),
})
if video.get('file'):
if video['file'].startswith('uggc'):
video['file'] = codecs.decode(video['file'], 'rot_13')
if video['file'].endswith('adc.mp4'):
video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
info_dict['formats'].append({
'url': video['file'],
'format_id': video_quality,
'height': int_or_none(video_quality[:-1]),
})
for quality, cda_quality in qualities.items():
if quality == video_quality:
continue
data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
data = json.dumps(data).encode()
video_url = self._download_json(
response = self._download_json(
f'https://www.cda.pl/video/{video_id}', video_id, headers={
'Content-Type': 'application/json',
'X-Requested-With': 'XMLHttpRequest',
}, data=data, note=f'Fetching {quality} url',
errnote=f'Failed to fetch {quality} url', fatal=False)
if try_get(video_url, lambda x: x['result']['status']) == 'ok':
video_url = try_get(video_url, lambda x: x['result']['resp'])
if (
traverse_obj(response, ('result', 'status')) != 'ok'
or not traverse_obj(response, ('result', 'resp', {url_or_none}))
):
continue
video_url = response['result']['resp']
ext = determine_ext(video_url)
if ext == 'mpd':
info_dict['formats'].extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
elif ext == 'm3u8':
info_dict['formats'].extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
info_dict['formats'].append({
'url': video_url,
'format_id': quality,

View File

@@ -9,6 +9,7 @@ from ..utils import (
ExtractorError,
classproperty,
float_or_none,
parse_qs,
traverse_obj,
url_or_none,
)
@@ -91,11 +92,15 @@ class DacastVODIE(DacastBaseIE):
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
return self._search_regex(
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
'usp signing secret', group='secret', fatal=False) or 'hGDtqMKYVeFdofrAfFmBcrsakaZELajI'
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
query = {
'contentId': f'{user_id}-vod-{video_id}',
'provider': 'universe',
**traverse_obj(url, ({parse_qs}, 'uss_token', {'signedKey': -1})),
}
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
access = self._download_json(
'https://playback.dacast.com/content/access', video_id,

View File

@@ -1,9 +1,15 @@
from .zdf import ZDFBaseIE
from ..utils import (
int_or_none,
merge_dicts,
parse_iso8601,
)
from ..utils.traversal import require, traverse_obj
class DreiSatIE(ZDFBaseIE):
IE_NAME = '3sat'
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/?#]+/)*(?P<id>[^/?#&]+)\.html'
_TESTS = [{
'url': 'https://www.3sat.de/dokumentation/reise/traumziele-suedostasiens-die-philippinen-und-vietnam-102.html',
'info_dict': {
@@ -12,40 +18,59 @@ class DreiSatIE(ZDFBaseIE):
'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
'description': 'md5:26329ce5197775b596773b939354079d',
'duration': 2625.0,
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148',
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~original?cb=1699870351148',
'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95',
'timestamp': 1738593000,
'upload_date': '20250203',
'timestamp': 1747920900,
'upload_date': '20250522',
},
}, {
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
'url': 'https://www.3sat.de/film/ab-18/ab-18---mein-fremdes-ich-100.html',
'md5': 'f92638413a11d759bdae95c9d8ec165c',
'info_dict': {
'id': '141007_ab18_10wochensommer_film',
'id': '221128_mein_fremdes_ich2_ab18',
'ext': 'mp4',
'title': 'Ab 18! - 10 Wochen Sommer',
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
'duration': 2660,
'timestamp': 1608604200,
'upload_date': '20201222',
'title': 'Ab 18! - Mein fremdes Ich',
'description': 'md5:cae0c0b27b7426d62ca0dda181738bf0',
'duration': 2625.0,
'thumbnail': 'https://www.3sat.de/assets/ab-18---mein-fremdes-ich-106~original?cb=1666081865812',
'episode': 'Ab 18! - Mein fremdes Ich',
'episode_id': 'POS_6225d1ca-a0d5-45e3-870b-e783ee6c8a3f',
'timestamp': 1695081600,
'upload_date': '20230919',
},
'skip': '410 Gone',
}, {
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
'url': 'https://www.3sat.de/gesellschaft/37-grad-leben/aus-dem-leben-gerissen-102.html',
'md5': 'a903eaf8d1fd635bd3317cd2ad87ec84',
'info_dict': {
'id': '140913_sendung_schweizweit',
'id': '250323_0903_sendung_sgl',
'ext': 'mp4',
'title': 'Waidmannsheil',
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
'timestamp': 1410623100,
'upload_date': '20140913',
'title': 'Plötzlich ohne dich',
'description': 'md5:380cc10659289dd91510ad8fa717c66b',
'duration': 1620.0,
'thumbnail': 'https://www.3sat.de/assets/37-grad-leben-106~original?cb=1645537156810',
'episode': 'Plötzlich ohne dich',
'episode_id': 'POS_faa7a93c-c0f2-4d51-823f-ce2ac3ee191b',
'timestamp': 1743162540,
'upload_date': '20250328',
},
'params': {
'skip_download': True,
}, {
# Video with chapters
'url': 'https://www.3sat.de/kultur/buchmesse/dein-buch-das-beste-von-der-leipziger-buchmesse-2025-teil-1-100.html',
'md5': '6b95790ce52e75f0d050adcdd2711ee6',
'info_dict': {
'id': '250330_dein_buch1_bum',
'ext': 'mp4',
'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844',
'duration': 5399.0,
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903',
'chapters': 'count:24',
'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb',
'timestamp': 1743327000,
'upload_date': '20250330',
},
'skip': '404 Not Found',
}, {
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
@@ -58,11 +83,42 @@ class DreiSatIE(ZDFBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._search_json(
r'data-zdfplayer-jsb=(["\'])', webpage, 'player JSON', video_id)
player_url = player['content']
api_token = f'Bearer {player["apiToken"]}'
webpage = self._download_webpage(url, video_id, fatal=False)
if webpage:
player = self._extract_player(webpage, url, fatal=False)
if player:
return self._extract_regular(url, player, video_id)
content = self._call_api(player_url, video_id, 'video metadata', api_token)
return self._extract_mobile(video_id)
video_target = content['mainVideoContent']['http://zdf.de/rels/target']
ptmd_path = traverse_obj(video_target, (
(('streams', 'default'), None),
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'),
{str}, any, {require('ptmd path')}))
ptmd_url = self._expand_ptmd_template(player_url, ptmd_path)
aspect_ratio = self._parse_aspect_ratio(video_target.get('aspectRatio'))
info = self._extract_ptmd(ptmd_url, video_id, api_token, aspect_ratio)
return merge_dicts(info, {
**traverse_obj(content, {
'title': (('title', 'teaserHeadline'), {str}, any),
'episode': (('title', 'teaserHeadline'), {str}, any),
'description': (('leadParagraph', 'teasertext'), {str}, any),
'timestamp': ('editorialDate', {parse_iso8601}),
}),
**traverse_obj(video_target, {
'duration': ('duration', {int_or_none}),
'chapters': ('streamAnchorTag', {self._extract_chapters}),
}),
'thumbnails': self._extract_thumbnails(traverse_obj(content, ('teaserImageRef', 'layouts', {dict}))),
**traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', {
'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}),
'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}),
'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}),
'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}),
'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}),
'episode_number': ('episodeNumber', {int_or_none}),
'episode_id': ('contentId', {str}),
})),
})

View File

@@ -16,7 +16,6 @@ from ..utils import (
MEDIA_EXTENSIONS,
ExtractorError,
UnsupportedError,
base_url,
determine_ext,
determine_protocol,
dict_get,
@@ -38,6 +37,7 @@ from ..utils import (
unescapeHTML,
unified_timestamp,
unsmuggle_url,
update_url,
update_url_query,
url_or_none,
urlhandle_detect_ext,
@@ -2538,12 +2538,13 @@ class GenericIE(InfoExtractor):
return self.playlist_result(
self._parse_xspf(
doc, video_id, xspf_url=url,
xspf_base_url=full_response.url),
xspf_base_url=new_url),
video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
doc,
mpd_base_url=base_url(full_response.url),
# Do not use yt_dlp.utils.base_url here since it will raise on file:// URLs
mpd_base_url=update_url(new_url, query=None, fragment=None).rpartition('/')[0],
mpd_url=url)
info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None
self._extra_manifest_info(info_dict, url)

View File

@@ -8,7 +8,7 @@ from ..utils.traversal import traverse_obj
class GetCourseRuPlayerIE(InfoExtractor):
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
_VALID_URL = r'https?://(?:player02\.getcourse\.ru|cf-api-2\.vhcdn\.com)/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
_TESTS = [{
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
@@ -20,6 +20,16 @@ class GetCourseRuPlayerIE(InfoExtractor):
'duration': 1693,
},
'skip': 'JWT expired',
}, {
'url': 'https://cf-api-2.vhcdn.com/sign-player/?json=example',
'info_dict': {
'id': '435735291',
'title': '8afd7c489952108e00f019590f3711f3',
'ext': 'mp4',
'thumbnail': 'https://preview-htz.vhcdn.com/preview/8afd7c489952108e00f019590f3711f3/preview.jpg?version=1682170973&host=vh-72',
'duration': 777,
},
'skip': 'JWT expired',
}]
def _real_extract(self, url):
@@ -168,7 +178,7 @@ class GetCourseRuIE(InfoExtractor):
playlist_id = self._search_regex(
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
return self.playlist_from_matches(
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),

View File

@@ -1,23 +1,33 @@
import functools
import itertools
import math
import re
from .common import InfoExtractor
from ..utils import (
InAdvancePagedList,
ISO639Utils,
OnDemandPagedList,
clean_html,
int_or_none,
js_to_json,
make_archive_id,
orderedSet,
smuggle_url,
unified_strdate,
unified_timestamp,
unsmuggle_url,
url_basename,
url_or_none,
urlencode_postdata,
urljoin,
variadic,
)
from ..utils.traversal import traverse_obj
class JioSaavnBaseIE(InfoExtractor):
_URL_BASE_RE = r'https?://(?:www\.)?(?:jio)?saavn\.com'
_API_URL = 'https://www.jiosaavn.com/api.php'
_VALID_BITRATES = {'16', '32', '64', '128', '320'}
@@ -30,16 +40,20 @@ class JioSaavnBaseIE(InfoExtractor):
f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
return requested_bitrates
def _extract_formats(self, song_data):
def _extract_formats(self, item_data):
# Show/episode JSON data has a slightly different structure than song JSON data
if media_url := traverse_obj(item_data, ('more_info', 'encrypted_media_url', {str})):
item_data.setdefault('encrypted_media_url', media_url)
for bitrate in self.requested_bitrates:
media_data = self._download_json(
self._API_URL, song_data['id'],
self._API_URL, item_data['id'],
f'Downloading format info for {bitrate}',
fatal=False, data=urlencode_postdata({
'__call': 'song.generateAuthToken',
'_format': 'json',
'bitrate': bitrate,
'url': song_data['encrypted_media_url'],
'url': item_data['encrypted_media_url'],
}))
if not traverse_obj(media_data, ('auth_url', {url_or_none})):
self.report_warning(f'Unable to extract format info for {bitrate}')
@@ -53,24 +67,6 @@ class JioSaavnBaseIE(InfoExtractor):
'vcodec': 'none',
}
def _extract_song(self, song_data, url=None):
info = traverse_obj(song_data, {
'id': ('id', {str}),
'title': ('song', {clean_html}),
'album': ('album', {clean_html}),
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
'duration': ('duration', {int_or_none}),
'view_count': ('play_count', {int_or_none}),
'release_year': ('year', {int_or_none}),
'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
'webpage_url': ('perma_url', {url_or_none}),
})
if webpage_url := info.get('webpage_url') or url:
info['display_id'] = url_basename(webpage_url)
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
return info
def _call_api(self, type_, token, note='API', params={}):
return self._download_json(
self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
@@ -84,19 +80,89 @@ class JioSaavnBaseIE(InfoExtractor):
**params,
})
def _yield_songs(self, playlist_data):
for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
song_info = self._extract_song(song_data)
url = smuggle_url(song_info['webpage_url'], {
'id': song_data['id'],
'encrypted_media_url': song_data['encrypted_media_url'],
})
yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
@staticmethod
def _extract_song(song_data, url=None):
info = traverse_obj(song_data, {
'id': ('id', {str}),
'title': (('song', 'title'), {clean_html}, any),
'album': ((None, 'more_info'), 'album', {clean_html}, any),
'duration': ((None, 'more_info'), 'duration', {int_or_none}, any),
'channel': ((None, 'more_info'), 'label', {str}, any),
'channel_id': ((None, 'more_info'), 'label_id', {str}, any),
'channel_url': ((None, 'more_info'), 'label_url', {urljoin('https://www.jiosaavn.com/')}, any),
'release_date': ((None, 'more_info'), 'release_date', {unified_strdate}, any),
'release_year': ('year', {int_or_none}),
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
'view_count': ('play_count', {int_or_none}),
'language': ('language', {lambda x: ISO639Utils.short2long(x.casefold()) or 'und'}),
'webpage_url': ('perma_url', {url_or_none}),
'artists': ('more_info', 'artistMap', 'primary_artists', ..., 'name', {str}, filter, all),
})
if webpage_url := info.get('webpage_url') or url:
info['display_id'] = url_basename(webpage_url)
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
if primary_artists := traverse_obj(song_data, ('primary_artists', {lambda x: x.split(', ') if x else None})):
info['artists'].extend(primary_artists)
if featured_artists := traverse_obj(song_data, ('featured_artists', {str}, filter)):
info['artists'].extend(featured_artists.split(', '))
info['artists'] = orderedSet(info['artists']) or None
return info
@staticmethod
def _extract_episode(episode_data, url=None):
info = JioSaavnBaseIE._extract_song(episode_data, url)
info.pop('_old_archive_ids', None)
info.update(traverse_obj(episode_data, {
'description': ('more_info', 'description', {str}),
'timestamp': ('more_info', 'release_time', {unified_timestamp}),
'series': ('more_info', 'show_title', {str}),
'series_id': ('more_info', 'show_id', {str}),
'season': ('more_info', 'season_title', {str}),
'season_number': ('more_info', 'season_no', {int_or_none}),
'season_id': ('more_info', 'season_id', {str}),
'episode_number': ('more_info', 'episode_number', {int_or_none}),
'cast': ('starring', {lambda x: x.split(', ') if x else None}),
}))
return info
def _extract_jiosaavn_result(self, url, endpoint, response_key, parse_func):
url, smuggled_data = unsmuggle_url(url)
data = traverse_obj(smuggled_data, ({
'id': ('id', {str}),
'encrypted_media_url': ('encrypted_media_url', {str}),
}))
if 'id' in data and 'encrypted_media_url' in data:
result = {'id': data['id']}
else:
# only extract metadata if this is not a url_transparent result
data = self._call_api(endpoint, self._match_id(url))[response_key][0]
result = parse_func(data, url)
result['formats'] = list(self._extract_formats(data))
return result
def _yield_items(self, playlist_data, keys=None, parse_func=None):
"""Subclasses using this method must set _ENTRY_IE"""
if parse_func is None:
parse_func = self._extract_song
for item_data in traverse_obj(playlist_data, (
*variadic(keys, (str, bytes, dict, set)), lambda _, v: v['id'] and v['perma_url'],
)):
info = parse_func(item_data)
url = smuggle_url(info['webpage_url'], traverse_obj(item_data, {
'id': ('id', {str}),
'encrypted_media_url': ((None, 'more_info'), 'encrypted_media_url', {str}, any),
}))
yield self.url_result(url, self._ENTRY_IE, url_transparent=True, **info)
class JioSaavnSongIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:song'
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'(?:/song/[^/?#]+/|/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
'md5': '3b84396d15ed9e083c3106f1fa589c04',
@@ -106,12 +172,38 @@ class JioSaavnSongIE(JioSaavnBaseIE):
'ext': 'm4a',
'title': 'Leja Re',
'album': 'Leja Re',
'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
'thumbnail': r're:https?://.+/.+\.jpg',
'duration': 205,
'view_count': int,
'release_year': 2018,
'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
'_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
'channel': 'T-Series',
'language': 'hin',
'channel_id': '34297',
'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
'release_date': '20181124',
},
}, {
'url': 'https://www.jiosaavn.com/song/chuttamalle/P1FfWjZkQ0Q',
'md5': '96296c58d6ce488a417ef0728fd2d680',
'info_dict': {
'id': 'O94kBTtw',
'display_id': 'P1FfWjZkQ0Q',
'ext': 'm4a',
'title': 'Chuttamalle',
'album': 'Devara Part 1 - Telugu',
'thumbnail': r're:https?://.+/.+\.jpg',
'duration': 222,
'view_count': int,
'release_year': 2024,
'artists': 'count:3',
'_old_archive_ids': ['jiosaavnsong P1FfWjZkQ0Q'],
'channel': 'T-Series',
'language': 'tel',
'channel_id': '34297',
'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
'release_date': '20240926',
},
}, {
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
@@ -119,26 +211,51 @@ class JioSaavnSongIE(JioSaavnBaseIE):
}]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url)
song_data = traverse_obj(smuggled_data, ({
'id': ('id', {str}),
'encrypted_media_url': ('encrypted_media_url', {str}),
}))
return self._extract_jiosaavn_result(url, 'song', 'songs', self._extract_song)
if 'id' in song_data and 'encrypted_media_url' in song_data:
result = {'id': song_data['id']}
else:
# only extract metadata if this is not a url_transparent result
song_data = self._call_api('song', self._match_id(url))['songs'][0]
result = self._extract_song(song_data, url)
result['formats'] = list(self._extract_formats(song_data))
return result
class JioSaavnShowIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:show'
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/[^/?#]+/(?P<id>[^/?#]{11,})/?(?:$|[?#])'
_TESTS = [{
'url': 'https://www.jiosaavn.com/shows/non-food-ways-to-boost-your-energy/XFMcKICOCgc_',
'md5': '0733cd254cfe74ef88bea1eaedcf1f4f',
'info_dict': {
'id': 'qqzh3RKZ',
'display_id': 'XFMcKICOCgc_',
'ext': 'mp3',
'title': 'Non-Food Ways To Boost Your Energy',
'description': 'md5:26e7129644b5c6aada32b8851c3997c8',
'episode': 'Episode 1',
'timestamp': 1640563200,
'series': 'Holistic Lifestyle With Neha Ranglani',
'series_id': '52397',
'season': 'Holistic Lifestyle With Neha Ranglani',
'season_number': 1,
'season_id': '61273',
'thumbnail': r're:https?://.+/.+\.jpg',
'duration': 311,
'view_count': int,
'release_year': 2021,
'language': 'eng',
'channel': 'Saavn OG',
'channel_id': '1953876',
'episode_number': 1,
'upload_date': '20211227',
'release_date': '20211227',
},
}, {
'url': 'https://www.jiosaavn.com/shows/himesh-reshammiya/Kr8fmfSN4vo_',
'only_matching': True,
}]
def _real_extract(self, url):
return self._extract_jiosaavn_result(url, 'episode', 'episodes', self._extract_episode)
class JioSaavnAlbumIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:album'
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/album/[^/?#]+/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
'info_dict': {
@@ -147,18 +264,19 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
},
'playlist_count': 10,
}]
_ENTRY_IE = JioSaavnSongIE
def _real_extract(self, url):
display_id = self._match_id(url)
album_data = self._call_api('album', display_id)
return self.playlist_result(
self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
self._yield_items(album_data, 'songs'), display_id, traverse_obj(album_data, ('title', {str})))
class JioSaavnPlaylistIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:playlist'
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
'info_dict': {
@@ -172,15 +290,16 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
'id': 'DVR,pFUOwyXqIp77B1JF,A__',
'title': 'Mood Hindi',
},
'playlist_mincount': 801,
'playlist_mincount': 750,
}, {
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
'info_dict': {
'id': 'Me5RridRfDk_',
'title': 'Taaza Tunes',
},
'playlist_mincount': 301,
'playlist_mincount': 50,
}]
_ENTRY_IE = JioSaavnSongIE
_PAGE_SIZE = 50
def _fetch_page(self, token, page):
@@ -189,7 +308,7 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
def _entries(self, token, first_page_data, page):
page_data = first_page_data if not page else self._fetch_page(token, page + 1)
yield from self._yield_songs(page_data)
yield from self._yield_items(page_data, 'songs')
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -199,3 +318,95 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
return self.playlist_result(InAdvancePagedList(
functools.partial(self._entries, display_id, playlist_data),
total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
class JioSaavnShowPlaylistIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:show:playlist'
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/(?P<show>[^#/?]+)/(?P<season>\d+)/[^/?#]+'
_TESTS = [{
'url': 'https://www.jiosaavn.com/shows/talking-music/1/PjReFP-Sguk_',
'info_dict': {
'id': 'talking-music-1',
'title': 'Talking Music',
},
'playlist_mincount': 11,
}]
_ENTRY_IE = JioSaavnShowIE
_PAGE_SIZE = 10
def _fetch_page(self, show_id, season_id, page):
return self._call_api('show', show_id, f'show page {page}', {
'p': page,
'__call': 'show.getAllEpisodes',
'show_id': show_id,
'season_number': season_id,
'api_version': '4',
'sort_order': 'desc',
})
def _entries(self, show_id, season_id, page):
page_data = self._fetch_page(show_id, season_id, page + 1)
yield from self._yield_items(page_data, keys=None, parse_func=self._extract_episode)
def _real_extract(self, url):
show_slug, season_id = self._match_valid_url(url).group('show', 'season')
playlist_id = f'{show_slug}-{season_id}'
webpage = self._download_webpage(url, playlist_id)
show_info = self._search_json(
r'window\.__INITIAL_DATA__\s*=', webpage, 'initial data',
playlist_id, transform_source=js_to_json)['showView']
show_id = show_info['current_id']
entries = OnDemandPagedList(functools.partial(self._entries, show_id, season_id), self._PAGE_SIZE)
return self.playlist_result(
entries, playlist_id, traverse_obj(show_info, ('show', 'title', 'text', {str})))
class JioSaavnArtistIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:artist'
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/artist/[^/?#]+/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/artist/krsna-songs/rYLBEve2z3U_',
'info_dict': {
'id': 'rYLBEve2z3U_',
'title': 'KR$NA',
},
'playlist_mincount': 38,
}, {
'url': 'https://www.jiosaavn.com/artist/sanam-puri-songs/SkNEv3qRhDE_',
'info_dict': {
'id': 'SkNEv3qRhDE_',
'title': 'Sanam Puri',
},
'playlist_mincount': 51,
}]
_ENTRY_IE = JioSaavnSongIE
_PAGE_SIZE = 50
def _fetch_page(self, artist_id, page):
return self._call_api('artist', artist_id, f'artist page {page + 1}', {
'p': page,
'n_song': self._PAGE_SIZE,
'n_album': self._PAGE_SIZE,
'sub_type': '',
'includeMetaTags': '',
'api_version': '4',
'category': 'alphabetical',
'sort_order': 'asc',
})
def _entries(self, artist_id, first_page):
for page in itertools.count():
playlist_data = first_page if not page else self._fetch_page(artist_id, page)
if not traverse_obj(playlist_data, ('topSongs', ..., {dict})):
break
yield from self._yield_items(playlist_data, 'topSongs')
def _real_extract(self, url):
artist_id = self._match_id(url)
first_page = self._fetch_page(artist_id, 0)
return self.playlist_result(
self._entries(artist_id, first_page), artist_id,
traverse_obj(first_page, ('name', {str})))

View File

@@ -1,4 +1,5 @@
import itertools
import json
import re
from .common import InfoExtractor
@@ -9,12 +10,12 @@ from ..utils import (
int_or_none,
mimetype2ext,
srt_subtitles_timecode,
traverse_obj,
try_get,
url_or_none,
urlencode_postdata,
urljoin,
)
from ..utils.traversal import find_elements, require, traverse_obj
class LinkedInBaseIE(InfoExtractor):
@@ -277,3 +278,110 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
entries, course_slug,
course_data.get('title'),
course_data.get('description'))
class LinkedInEventsIE(LinkedInBaseIE):
IE_NAME = 'linkedin:events'
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/events/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.linkedin.com/events/7084656651378536448/comments/',
'info_dict': {
'id': '7084656651378536448',
'ext': 'mp4',
'title': '#37 Aprende a hacer una entrevista en inglés para tu próximo trabajo remoto',
'description': '¡Agarra para anotar que se viene tremendo evento!',
'duration': 1765,
'timestamp': 1689113772,
'upload_date': '20230711',
'release_timestamp': 1689174012,
'release_date': '20230712',
'live_status': 'was_live',
},
}, {
'url': 'https://www.linkedin.com/events/27-02energyfreedombyenergyclub7295762520814874625/comments/',
'info_dict': {
'id': '27-02energyfreedombyenergyclub7295762520814874625',
'ext': 'mp4',
'title': '27.02 Energy Freedom by Energy Club',
'description': 'md5:1292e6f31df998914c293787a02c3b91',
'duration': 6420,
'timestamp': 1739445333,
'upload_date': '20250213',
'release_timestamp': 1740657620,
'release_date': '20250227',
'live_status': 'was_live',
},
}]
def _real_initialize(self):
if not self._get_cookies('https://www.linkedin.com/').get('li_at'):
self.raise_login_required()
def _real_extract(self, url):
event_id = self._match_id(url)
webpage = self._download_webpage(url, event_id)
base_data = traverse_obj(webpage, (
{find_elements(tag='code', attr='style', value='display: none')}, ..., {json.loads}, 'included', ...))
meta_data = traverse_obj(base_data, (
lambda _, v: v['$type'] == 'com.linkedin.voyager.dash.events.ProfessionalEvent', any)) or {}
live_status = {
'PAST': 'was_live',
'ONGOING': 'is_live',
'FUTURE': 'is_upcoming',
}.get(meta_data.get('lifecycleState'))
if live_status == 'is_upcoming':
player_data = {}
if event_time := traverse_obj(meta_data, ('displayEventTime', {str})):
message = f'This live event is scheduled for {event_time}'
else:
message = 'This live event has not yet started'
self.raise_no_formats(message, expected=True, video_id=event_id)
else:
# TODO: Add support for audio-only live events
player_data = traverse_obj(base_data, (
lambda _, v: v['$type'] == 'com.linkedin.videocontent.VideoPlayMetadata',
any, {require('video player data')}))
formats, subtitles = [], {}
for prog_fmts in traverse_obj(player_data, ('progressiveStreams', ..., {dict})):
for fmt_url in traverse_obj(prog_fmts, ('streamingLocations', ..., 'url', {url_or_none})):
formats.append({
'url': fmt_url,
**traverse_obj(prog_fmts, {
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'tbr': ('bitRate', {int_or_none(scale=1000)}),
'filesize': ('size', {int_or_none}),
'ext': ('mediaType', {mimetype2ext}),
}),
})
for m3u8_url in traverse_obj(player_data, (
'adaptiveStreams', lambda _, v: v['protocol'] == 'HLS', 'masterPlaylists', ..., 'url', {url_or_none},
)):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, event_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': event_id,
'formats': formats,
'subtitles': subtitles,
'live_status': live_status,
**traverse_obj(meta_data, {
'title': ('name', {str}),
'description': ('description', 'text', {str}),
'timestamp': ('createdAt', {int_or_none(scale=1000)}),
# timeRange.start is available when the stream is_upcoming
'release_timestamp': ('timeRange', 'start', {int_or_none(scale=1000)}),
}),
**traverse_obj(player_data, {
'duration': ('duration', {int_or_none(scale=1000)}),
# liveStreamCreatedAt is only available when the stream is_live or was_live
'release_timestamp': ('liveStreamCreatedAt', {int_or_none(scale=1000)}),
}),
}

View File

@@ -365,13 +365,15 @@ mutation initPlaybackSession(
'All videos are only available to registered users', method='password')
def _set_device_id(self, username):
if not self._device_id:
self._device_id = self.cache.load(
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
if self._device_id:
return
device_id_cache = self.cache.load(self._NETRC_MACHINE, 'device_ids', default={})
self._device_id = device_id_cache.get(username)
if self._device_id:
return
self._device_id = str(uuid.uuid4())
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
device_id_cache[username] = self._device_id
self.cache.store(self._NETRC_MACHINE, 'device_ids', device_id_cache)
def _perform_login(self, username, password):
try:

View File

@@ -16,7 +16,7 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
join_nonempty,
parse_bitrate,
parse_duration,
parse_iso8601,
parse_qs,
@@ -24,8 +24,6 @@ from ..utils import (
qualities,
remove_start,
str_or_none,
traverse_obj,
try_get,
unescapeHTML,
unified_timestamp,
update_url_query,
@@ -34,13 +32,70 @@ from ..utils import (
urlencode_postdata,
urljoin,
)
from ..utils.traversal import find_element, traverse_obj
class NiconicoIE(InfoExtractor):
class NiconicoBaseIE(InfoExtractor):
_GEO_BYPASS = False
_GEO_COUNTRIES = ['JP']
_LOGIN_BASE = 'https://account.nicovideo.jp'
_NETRC_MACHINE = 'niconico'
@property
def is_logged_in(self):
return bool(self._get_cookies('https://www.nicovideo.jp').get('user_session'))
def _raise_login_error(self, message, expected=True):
raise ExtractorError(f'Unable to login: {message}', expected=expected)
def _perform_login(self, username, password):
if self.is_logged_in:
return
self._request_webpage(
f'{self._LOGIN_BASE}/login', None, 'Requesting session cookies')
webpage = self._download_webpage(
f'{self._LOGIN_BASE}/login/redirector', None,
'Logging in', 'Unable to log in', headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': f'{self._LOGIN_BASE}/login',
}, data=urlencode_postdata({
'mail_tel': username,
'password': password,
}))
if self.is_logged_in:
return
elif err_msg := traverse_obj(webpage, (
{find_element(cls='notice error')}, {find_element(cls='notice__text')}, {clean_html},
)):
self._raise_login_error(err_msg or 'Invalid username or password')
elif 'oneTimePw' in webpage:
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage, 'post url', group='url')
mfa, urlh = self._download_webpage_handle(
urljoin(self._LOGIN_BASE, post_url), None,
'Performing MFA', 'Unable to complete MFA', headers={
'Content-Type': 'application/x-www-form-urlencoded',
}, data=urlencode_postdata({
'otp': self._get_tfa_info('6 digit number shown on app'),
}))
if self.is_logged_in:
return
elif 'error-code' in parse_qs(urlh.url):
err_msg = traverse_obj(mfa, ({find_element(cls='pageMainMsg')}, {clean_html}))
self._raise_login_error(err_msg or 'MFA session expired')
elif 'formError' in mfa:
err_msg = traverse_obj(mfa, (
{find_element(cls='formError')}, {find_element(tag='div')}, {clean_html}))
self._raise_login_error(err_msg or 'MFA challenge failed')
self._raise_login_error('Unexpected login error', expected=False)
class NiconicoIE(NiconicoBaseIE):
IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画'
_GEO_COUNTRIES = ['JP']
_GEO_BYPASS = False
_TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215',
@@ -180,229 +235,6 @@ class NiconicoIE(InfoExtractor):
}]
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
_API_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0',
'X-Niconico-Language': 'en-us',
'Referer': 'https://www.nicovideo.jp/',
'Origin': 'https://www.nicovideo.jp',
}
def _perform_login(self, username, password):
login_ok = True
login_form_strs = {
'mail_tel': username,
'password': password,
}
self._request_webpage(
'https://account.nicovideo.jp/login', None,
note='Acquiring Login session')
page = self._download_webpage(
'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(login_form_strs),
headers={
'Referer': 'https://account.nicovideo.jp/login',
'Content-Type': 'application/x-www-form-urlencoded',
})
if 'oneTimePw' in page:
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
page = self._download_webpage(
urljoin('https://account.nicovideo.jp', post_url), None,
note='Performing MFA', errnote='Unable to complete MFA',
data=urlencode_postdata({
'otp': self._get_tfa_info('6 digits code'),
}), headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
if 'oneTimePw' in page or 'formError' in page:
err_msg = self._html_search_regex(
r'formError["\']+>(.*?)</div>', page, 'form_error',
default='There\'s an error but the message can\'t be parsed.',
flags=re.DOTALL)
self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
return False
login_ok = 'class="notice error"' not in page
if not login_ok:
self.report_warning('Unable to log in: bad username or password')
return login_ok
def _get_heartbeat_info(self, info_dict):
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
dmc_protocol = info_dict['expected_protocol']
api_data = (
info_dict.get('_api_data')
or self._parse_json(
self._html_search_regex(
'data-api-data="([^"]+)"',
self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
'API data', default='{}'),
video_id))
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
def ping():
tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
if tracking_id:
tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
watch_request_response = self._download_json(
tracking_url, video_id,
note='Acquiring permission for downloading video', fatal=False,
headers=self._API_HEADERS)
if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
yesno = lambda x: 'yes' if x else 'no'
if dmc_protocol == 'http':
protocol = 'http'
protocol_parameters = {
'http_output_download_parameters': {
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
},
}
elif dmc_protocol == 'hls':
protocol = 'm3u8'
segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
parsed_token = self._parse_json(session_api_data['token'], video_id)
encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
protocol_parameters = {
'hls_parameters': {
'segment_duration': segment_duration,
'transfer_preset': '',
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
},
}
if 'hls_encryption' in parsed_token and encryption:
protocol_parameters['hls_parameters']['encryption'] = {
parsed_token['hls_encryption']: {
'encrypted_key': encryption['encryptedKey'],
'key_uri': encryption['keyUri'],
},
}
else:
protocol = 'm3u8_native'
else:
raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
session_response = self._download_json(
session_api_endpoint['url'], video_id,
query={'_format': 'json'},
headers={'Content-Type': 'application/json'},
note='Downloading JSON metadata for {}'.format(info_dict['format_id']),
data=json.dumps({
'session': {
'client_info': {
'player_id': session_api_data.get('playerId'),
},
'content_auth': {
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
'service_id': 'nicovideo',
'service_user_id': session_api_data.get('serviceUserId'),
},
'content_id': session_api_data.get('contentId'),
'content_src_id_sets': [{
'content_src_ids': [{
'src_id_to_mux': {
'audio_src_ids': [audio_src_id],
'video_src_ids': [video_src_id],
},
}],
}],
'content_type': 'movie',
'content_uri': '',
'keep_method': {
'heartbeat': {
'lifetime': session_api_data.get('heartbeatLifetime'),
},
},
'priority': session_api_data['priority'],
'protocol': {
'name': 'http',
'parameters': {
'http_parameters': {
'parameters': protocol_parameters,
},
},
},
'recipe_id': session_api_data.get('recipeId'),
'session_operation_auth': {
'session_operation_auth_by_signature': {
'signature': session_api_data.get('signature'),
'token': session_api_data.get('token'),
},
},
'timing_constraint': 'unlimited',
},
}).encode())
info_dict['url'] = session_response['data']['session']['content_uri']
info_dict['protocol'] = protocol
# get heartbeat info
heartbeat_info_dict = {
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
'data': json.dumps(session_response['data']),
# interval, convert milliseconds to seconds, then halve to make a buffer.
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
'ping': ping,
}
return info_dict, heartbeat_info_dict
def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
return None
format_id = '-'.join(
[remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
return {
'url': 'niconico_dmc:{}/{}/{}'.format(video_id, video_quality['id'], audio_quality['id']),
'format_id': format_id,
'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
'acodec': 'aac',
'vcodec': 'h264',
**traverse_obj(audio_quality, ('metadata', {
'abr': ('bitrate', {float_or_none(scale=1000)}),
'asr': ('samplingRate', {int_or_none}),
})),
**traverse_obj(video_quality, ('metadata', {
'vbr': ('bitrate', {float_or_none(scale=1000)}),
'height': ('resolution', 'height', {int_or_none}),
'width': ('resolution', 'width', {int_or_none}),
})),
'quality': -2 if 'low' in video_quality['id'] else None,
'protocol': 'niconico_dmc',
'expected_protocol': dmc_protocol, # XXX: This is not a documented field
'http_headers': {
'Origin': 'https://www.nicovideo.jp',
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
},
}
def _yield_dmc_formats(self, api_data, video_id):
dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
if not all((audios, videos, protocols)):
return
for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
yield fmt
def _yield_dms_formats(self, api_data, video_id):
fmt_filter = lambda _, v: v['isAvailable'] and v['id']
@@ -485,8 +317,8 @@ class NiconicoIE(InfoExtractor):
'needs_premium': ('isPremium', {bool}),
'needs_subscription': ('isAdmission', {bool}),
})) or {'needs_auth': True}))
formats = [*self._yield_dmc_formats(api_data, video_id),
*self._yield_dms_formats(api_data, video_id)]
formats = list(self._yield_dms_formats(api_data, video_id))
if not formats:
fail_msg = clean_html(self._html_search_regex(
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
@@ -921,7 +753,7 @@ class NiconicoUserIE(InfoExtractor):
return self.playlist_result(self._entries(list_id), list_id)
class NiconicoLiveIE(InfoExtractor):
class NiconicoLiveIE(NiconicoBaseIE):
IE_NAME = 'niconico:live'
IE_DESC = 'ニコニコ生放送'
_VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
@@ -953,8 +785,6 @@ class NiconicoLiveIE(InfoExtractor):
'only_matching': True,
}]
_KNOWN_LATENCY = ('high', 'low')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
@@ -970,22 +800,19 @@ class NiconicoLiveIE(InfoExtractor):
})
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
if latency not in self._KNOWN_LATENCY:
latency = 'high'
ws = self._request_webpage(
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
video_id=video_id, note='Connecting to WebSocket server')
self.write_debug('[debug] Sending HLS server request')
self.write_debug('Sending HLS server request')
ws.send(json.dumps({
'type': 'startWatching',
'data': {
'stream': {
'quality': 'abr',
'protocol': 'hls+fmp4',
'latency': latency,
'protocol': 'hls',
'latency': 'high',
'accessRightMethod': 'single_cookie',
'chasePlay': False,
},
@@ -1049,18 +876,29 @@ class NiconicoLiveIE(InfoExtractor):
for cookie in cookies:
self._set_cookie(
cookie['domain'], cookie['name'], cookie['value'],
expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
fmt_common = {
'live_latency': 'high',
'origin': hostname,
'protocol': 'niconico_live',
'video_id': video_id,
'ws': ws,
}
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
a_map = {96: 'audio_low', 192: 'audio_high'}
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
for fmt, q in zip(formats, reversed(qualities[1:])):
fmt.update({
'format_id': q,
'protocol': 'niconico_live',
'ws': ws,
'video_id': video_id,
'live_latency': latency,
'origin': hostname,
})
for fmt in formats:
if fmt.get('acodec') == 'none':
fmt['format_id'] = next(q_iter, fmt['format_id'])
elif fmt.get('vcodec') == 'none':
abr = parse_bitrate(fmt['url'].lower())
fmt.update({
'abr': abr,
'format_id': a_map.get(abr, fmt['format_id']),
})
fmt.update(fmt_common)
return {
'id': video_id,

View File

@@ -181,6 +181,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
'duration': 119.0,
},
'skip': 'HTTP Error 500: Internal Server Error',
}, {
# article with audio and no video
'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
@@ -190,13 +191,14 @@ class NYTimesArticleIE(NYTimesBaseIE):
'ext': 'mp3',
'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
'timestamp': 1695960700,
'timestamp': 1696008129,
'upload_date': '20230929',
'creator': 'Stephanie Nolen, Natalija Gormalova',
'creators': ['Stephanie Nolen', 'Natalija Gormalova'],
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
'duration': 1322,
},
}, {
# lede_media_block already has sourceId
'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
'md5': '3eb5ddb1d6f86254fe4f233826778737',
'info_dict': {
@@ -207,7 +209,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
'timestamp': 1701290997,
'upload_date': '20231129',
'uploader': 'By The New York Times',
'creator': 'Katie Rogers',
'creators': ['Katie Rogers'],
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
'duration': 97.631,
},
@@ -222,10 +224,22 @@ class NYTimesArticleIE(NYTimesBaseIE):
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
'upload_date': '20231202',
'creator': 'Emily Steel, Sydney Ember',
'creators': ['Emily Steel', 'Sydney Ember'],
'timestamp': 1701511264,
},
'playlist_count': 3,
}, {
# lede_media_block does not have sourceId
'url': 'https://www.nytimes.com/2025/04/30/well/move/hip-mobility-routine.html',
'info_dict': {
'id': 'hip-mobility-routine',
'title': 'Tight Hips? These Moves Can Help.',
'description': 'Sitting all day is hard on your hips. Try this simple routine for better mobility.',
'creators': ['Alyssa Ages', 'Theodore Tae'],
'timestamp': 1746003629,
'upload_date': '20250430',
},
'playlist_count': 7,
}, {
'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
'only_matching': True,
@@ -256,14 +270,18 @@ class NYTimesArticleIE(NYTimesBaseIE):
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
webpage = self._download_webpage(url, page_id, impersonate=True)
art_json = self._search_json(
r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
content = art_json['sprinkledBody']['content']
blocks = traverse_obj(art_json, (
'sprinkledBody', 'content', ..., ('ledeMedia', None),
lambda _, v: v['__typename'] in ('Video', 'Audio')))
blocks = []
block_filter = lambda k, v: k == 'media' and v['__typename'] in ('Video', 'Audio')
if lede_media_block := traverse_obj(content, (..., 'ledeMedia', block_filter, any)):
lede_media_block.setdefault('sourceId', art_json.get('sourceId'))
blocks.append(lede_media_block)
blocks.extend(traverse_obj(content, (..., block_filter)))
if not blocks:
raise ExtractorError('Unable to extract any media blocks from webpage')
@@ -273,8 +291,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
'creator': ', '.join(
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
'creators': traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName', {str})),
'thumbnails': self._extract_thumbnails(traverse_obj(
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
}

View File

@@ -1,5 +1,3 @@
import re
from .youtube import YoutubeIE
from .zdf import ZDFBaseIE
from ..utils import (
@@ -7,44 +5,27 @@ from ..utils import (
merge_dicts,
try_get,
unified_timestamp,
urljoin,
)
class PhoenixIE(ZDFBaseIE):
IE_NAME = 'phoenix.de'
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/?#]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
_TESTS = [{
# Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
'md5': '34ec321e7eb34231fd88616c65c92db0',
'url': 'https://www.phoenix.de/sendungen/dokumentationen/spitzbergen-a-893349.html',
'md5': 'a79e86d9774d0b3f2102aff988a0bd32',
'info_dict': {
'id': '210222_phx_nachgehakt_corona_protest',
'id': '221215_phx_spitzbergen',
'ext': 'mp4',
'title': 'Wohin führt der Protest in der Pandemie?',
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
'duration': 1691,
'timestamp': 1613902500,
'upload_date': '20210221',
'title': 'Spitzbergen',
'description': 'Film von Tilmann Bünz',
'duration': 728.0,
'timestamp': 1555600500,
'upload_date': '20190418',
'uploader': 'Phoenix',
'series': 'corona nachgehakt',
'episode': 'Wohin führt der Protest in der Pandemie?',
},
}, {
# Youtube embed
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
'info_dict': {
'id': 'hMQtqFYjomk',
'ext': 'mp4',
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
'duration': 3509,
'upload_date': '20201219',
'uploader': 'phoenix',
'uploader_id': 'phoenix',
},
'params': {
'skip_download': True,
'thumbnail': 'https://www.phoenix.de/sixcms/media.php/21/Bergspitzen1.png',
'series': 'Dokumentationen',
'episode': 'Spitzbergen',
},
}, {
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
@@ -90,8 +71,8 @@ class PhoenixIE(ZDFBaseIE):
content_id = details['tracking']['nielsen']['content']['assetid']
info = self._extract_ptmd(
f'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/{content_id}',
content_id, None, url)
f'https://tmd.phoenix.de/tmd/2/android_native_6/vod/ptmd/phoenix/{content_id}',
content_id)
duration = int_or_none(try_get(
details, lambda x: x['tracking']['nielsen']['content']['length']))
@@ -101,20 +82,8 @@ class PhoenixIE(ZDFBaseIE):
str)
episode = title if details.get('contentType') == 'episode' else None
thumbnails = []
teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
for thumbnail_key, thumbnail_url in teaser_images.items():
thumbnail_url = urljoin(url, thumbnail_url)
if not thumbnail_url:
continue
thumbnail = {
'url': thumbnail_url,
}
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
if m:
thumbnail['width'] = int(m.group(1))
thumbnail['height'] = int(m.group(2))
thumbnails.append(thumbnail)
thumbnails = self._extract_thumbnails(teaser_images)
return merge_dicts(info, {
'id': content_id,

View File

@@ -7,11 +7,13 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
parse_qs,
traverse_obj,
update_url_query,
urlencode_postdata,
)
from ..utils.traversal import unpack
class PlaySuisseIE(InfoExtractor):
@@ -26,12 +28,12 @@ class PlaySuisseIE(InfoExtractor):
{
# episode in a series
'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
'md5': 'e20d1ede6872a03b41905ca1060a1ef2',
'info_dict': {
'id': '763211',
'ext': 'mp4',
'title': 'Knochen',
'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
'description': 'md5:3bdd80e2ce20227c47aab1df2a79a519',
'duration': 3344,
'series': 'Wilder',
'season': 'Season 1',
@@ -42,24 +44,33 @@ class PlaySuisseIE(InfoExtractor):
},
}, {
# film
'url': 'https://www.playsuisse.ch/watch/808675',
'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
'url': 'https://www.playsuisse.ch/detail/2573198',
'md5': '1f115bb0a5191477b1a5771643a4283d',
'info_dict': {
'id': '808675',
'id': '2573198',
'ext': 'mp4',
'title': 'Der Läufer',
'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
'duration': 5280,
'title': 'Azor',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'genres': ['Fiction'],
'creators': ['Andreas Fontana'],
'cast': ['Fabrizio Rongione', 'Stéphanie Cléau', 'Gilles Privat', 'Alexandre Trocki'],
'location': 'France; Argentine',
'release_year': 2021,
'duration': 5981,
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
},
}, {
# series (treated as a playlist)
'url': 'https://www.playsuisse.ch/detail/1115687',
'info_dict': {
'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
'id': '1115687',
'series': 'They all came out to Montreux',
'title': 'They all came out to Montreux',
'description': 'md5:0fefd8c5b4468a0bb35e916887681520',
'genres': ['Documentary'],
'creators': ['Oliver Murray'],
'location': 'Switzerland',
'release_year': 2021,
},
'playlist': [{
'info_dict': {
@@ -120,6 +131,12 @@ class PlaySuisseIE(InfoExtractor):
id
name
description
descriptionLong
year
contentTypes
directors
mainCast
productionCountries
duration
episodeNumber
seasonNumber
@@ -215,9 +232,7 @@ class PlaySuisseIE(InfoExtractor):
if not self._ID_TOKEN:
raise ExtractorError('Login failed')
def _get_media_data(self, media_id):
# NOTE In the web app, the "locale" header is used to switch between languages,
# However this doesn't seem to take effect when passing the header here.
def _get_media_data(self, media_id, locale=None):
response = self._download_json(
'https://www.playsuisse.ch/api/graphql',
media_id, data=json.dumps({
@@ -225,7 +240,7 @@ class PlaySuisseIE(InfoExtractor):
'query': self._GRAPHQL_QUERY,
'variables': {'assetId': media_id},
}).encode(),
headers={'Content-Type': 'application/json', 'locale': 'de'})
headers={'Content-Type': 'application/json', 'locale': locale or 'de'})
return response['data']['assetV2']
@@ -234,7 +249,7 @@ class PlaySuisseIE(InfoExtractor):
self.raise_login_required(method='password')
media_id = self._match_id(url)
media_data = self._get_media_data(media_id)
media_data = self._get_media_data(media_id, traverse_obj(parse_qs(url), ('locale', 0)))
info = self._extract_single(media_data)
if media_data.get('episodes'):
info.update({
@@ -257,15 +272,22 @@ class PlaySuisseIE(InfoExtractor):
self._merge_subtitles(subs, target=subtitles)
return {
'id': media_data['id'],
'title': media_data.get('name'),
'description': media_data.get('description'),
'thumbnails': thumbnails,
'duration': int_or_none(media_data.get('duration')),
'formats': formats,
'subtitles': subtitles,
'series': media_data.get('seriesName'),
'season_number': int_or_none(media_data.get('seasonNumber')),
'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
'episode_number': int_or_none(media_data.get('episodeNumber')),
**traverse_obj(media_data, {
'id': ('id', {str}),
'title': ('name', {str}),
'description': (('descriptionLong', 'description'), {str}, any),
'genres': ('contentTypes', ..., {str}),
'creators': ('directors', ..., {str}),
'cast': ('mainCast', ..., {str}),
'location': ('productionCountries', ..., {str}, all, {unpack(join_nonempty, delim='; ')}, filter),
'release_year': ('year', {str}, {lambda x: x[:4]}, {int_or_none}),
'duration': ('duration', {int_or_none}),
'series': ('seriesName', {str}),
'season_number': ('seasonNumber', {int_or_none}),
'episode': ('name', {str}, {lambda x: x if media_data['episodeNumber'] is not None else None}),
'episode_number': ('episodeNumber', {int_or_none}),
}),
}

View File

@@ -321,6 +321,27 @@ class RaiPlayIE(RaiBaseIE):
'timestamp': 1348495020,
'upload_date': '20120924',
},
}, {
# checking program_info gives false positive for DRM
'url': 'https://www.raiplay.it/video/2022/10/Ad-ogni-costo---Un-giorno-in-Pretura---Puntata-del-15102022-1dfd1295-ea38-4bac-b51e-f87e2881693b.html',
'md5': '572c6f711b7c5f2d670ba419b4ae3b08',
'info_dict': {
'id': '1dfd1295-ea38-4bac-b51e-f87e2881693b',
'ext': 'mp4',
'title': 'Ad ogni costo - Un giorno in Pretura - Puntata del 15/10/2022',
'alt_title': 'St 2022/23 - Un giorno in pretura - Ad ogni costo',
'description': 'md5:4046d97b2687f74f06a8b8270ba5599f',
'uploader': 'Rai 3',
'duration': 3773.0,
'thumbnail': 'https://www.raiplay.it/dl/img/2022/10/12/1665586539957_2048x2048.png',
'creators': ['Rai 3'],
'series': 'Un giorno in pretura',
'season': '2022/23',
'episode': 'Ad ogni costo',
'timestamp': 1665507240,
'upload_date': '20221011',
'release_year': 2025,
},
}, {
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
'only_matching': True,
@@ -340,9 +361,8 @@ class RaiPlayIE(RaiBaseIE):
media = self._download_json(
f'{base}.json', video_id, 'Downloading video JSON')
if not self.get_param('allow_unplayable_formats'):
if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')):
self.report_drm(video_id)
if traverse_obj(media, ('rights_management', 'rights', 'drm')):
self.report_drm(video_id)
video = media['video']
relinker_info = self._extract_relinker_info(video['content_url'], video_id)

View File

@@ -388,7 +388,8 @@ class RedditIE(InfoExtractor):
})
if entries:
return self.playlist_result(entries, video_id, **info)
raise ExtractorError('No media found', expected=True)
self.raise_no_formats('No media found', expected=True, video_id=video_id)
return {**info, 'id': video_id}
# Check if media is hosted on reddit:
reddit_video = traverse_obj(data, (

View File

@@ -1,61 +0,0 @@
from .adobepass import AdobePassIE
from ..utils import (
int_or_none,
smuggle_url,
update_url_query,
)
class SproutIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
'info_dict': {
'id': 'bm0foJFaTKqb',
'ext': 'mp4',
'title': 'Robot Bike Race',
'description': 'md5:436b1d97117cc437f54c383f4debc66d',
'timestamp': 1606148940,
'upload_date': '20201123',
'uploader': 'NBCU-MPAT',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
'only_matching': True,
}, {
'url': 'https://www.universalkids.com/watch/robot-bike-race',
'only_matching': True,
}]
_GEO_COUNTRIES = ['US']
def _real_extract(self, url):
display_id = self._match_id(url)
mpx_metadata = self._download_json(
# http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
'https://www.universalkids.com/_api/videos/' + display_id,
display_id)['mpxMetadata']
media_pid = mpx_metadata['mediaPid']
theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
query = {
'mbr': 'true',
'manifest': 'm3u',
}
if mpx_metadata.get('entitlement') == 'auth':
query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
theplatform_url = smuggle_url(
update_url_query(theplatform_url, query), {
'force_smil_url': True,
'geo_countries': self._GEO_COUNTRIES,
})
return {
'_type': 'url_transparent',
'id': media_pid,
'url': theplatform_url,
'series': mpx_metadata.get('seriesName'),
'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
'ie_key': 'ThePlatform',
}

View File

@@ -471,8 +471,7 @@ class SVTPageIE(SVTBaseIE):
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage)
urql_state = self._search_json(
r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
urql_state = self._search_json(r'urqlState\s*[=:]', webpage, 'json data', display_id)
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}

View File

@@ -2,12 +2,13 @@ import json
import re
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from ..utils import (
determine_ext,
extract_attributes,
js_to_json,
url_or_none,
)
from ..utils.traversal import find_element, traverse_obj
class TV2DKIE(InfoExtractor):
@@ -21,35 +22,46 @@ class TV2DKIE(InfoExtractor):
tv2fyn|
tv2east|
tv2lorry|
tv2nord
tv2nord|
tv2kosmopol
)\.dk/
(:[^/]+/)*
(?:[^/?#]+/)*
(?P<id>[^/?\#&]+)
'''
_TESTS = [{
'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
'info_dict': {
'id': '0_52jmwa0p',
'id': 'sPp5z21q',
'ext': 'mp4',
'title': '19:30 - 28. okt. 2019',
'timestamp': 1572290248,
'description': '',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/sPp5z21q/poster.jpg?width=720',
'timestamp': 1572287400,
'upload_date': '20191028',
'uploader_id': 'tvsyd',
'duration': 1347,
'view_count': int,
},
'add_ie': ['Kaltura'],
}, {
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
'info_dict': {
'id': '1_7iwll9n0',
'id': 'oD9cyq0m',
'ext': 'mp4',
'upload_date': '20211027',
'title': 'Gadekamp #6 - Højhuse i København',
'uploader_id': 'tv2lorry',
'timestamp': 1635345229,
'description': '',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/oD9cyq0m/poster.jpg?width=720',
'timestamp': 1635348600,
'upload_date': '20211027',
},
'add_ie': ['Kaltura'],
}, {
'url': 'https://www.tvsyd.dk/haderslev/x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
'info_dict': {
'id': 'x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
},
'playlist_count': 2,
}, {
'url': 'https://www.tv2ostjylland.dk/aarhus/dom-kan-fa-alvorlige-konsekvenser',
'info_dict': {
'id': 'dom-kan-fa-alvorlige-konsekvenser',
},
'playlist_count': 3,
}, {
'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
'only_matching': True,
@@ -71,40 +83,22 @@ class TV2DKIE(InfoExtractor):
}, {
'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
'only_matching': True,
}, {
'url': 'https://www.tv2kosmopol.dk/metropolen/chaufforer-beordres-til-at-kore-videre-i-ulovlige-busser-med-rode-advarselslamper',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
search_space = traverse_obj(webpage, {find_element(tag='article')}) or webpage
entries = []
player_ids = traverse_obj(
re.findall(r'x-data="(?:video_player|simple_player)\(({[^"]+})', search_space),
(..., {js_to_json}, {json.loads}, ('jwpMediaId', 'videoId'), {str}))
def add_entry(partner_id, kaltura_id):
entries.append(self.url_result(
f'kaltura:{partner_id}:{kaltura_id}', 'Kaltura',
video_id=kaltura_id))
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
video = extract_attributes(video_el)
kaltura_id = video.get('data-entryid')
if not kaltura_id:
continue
partner_id = video.get('data-partnerid')
if not partner_id:
continue
add_entry(partner_id, kaltura_id)
if not entries:
kaltura_id = self._search_regex(
(r'entry_id\s*:\s*["\']([0-9a-z_]+)',
r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
partner_id = self._search_regex(
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
'partner id')
add_entry(partner_id, kaltura_id)
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries)
return self.playlist_from_matches(
player_ids, video_id, getter=lambda x: f'jwplatform:{x}', ie=JWPlatformIE)
class TV2DKBornholmPlayIE(InfoExtractor):

View File

@@ -14,12 +14,13 @@ from ..utils import (
parse_duration,
qualities,
str_to_int,
traverse_obj,
try_get,
unified_timestamp,
url_or_none,
urlencode_postdata,
urljoin,
)
from ..utils.traversal import traverse_obj
class TwitCastingIE(InfoExtractor):
@@ -138,13 +139,7 @@ class TwitCastingIE(InfoExtractor):
r'data-toggle="true"[^>]+datetime="([^"]+)"',
webpage, 'datetime', None))
stream_server_data = self._download_json(
f'https://twitcasting.tv/streamserver.php?target={uploader_id}&mode=client', video_id,
'Downloading live info', fatal=False)
is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"'])
if not traverse_obj(stream_server_data, 'llfmp4') and is_live:
self.raise_login_required(method='cookies')
base_dict = {
'title': title,
@@ -165,28 +160,37 @@ class TwitCastingIE(InfoExtractor):
return [data_movie_url]
m3u8_urls = (try_get(webpage, find_dmu, list)
or traverse_obj(video_js_data, (..., 'source', 'url'))
or ([f'https://twitcasting.tv/{uploader_id}/metastream.m3u8'] if is_live else None))
if not m3u8_urls:
raise ExtractorError('Failed to get m3u8 playlist')
or traverse_obj(video_js_data, (..., 'source', 'url')))
if is_live:
m3u8_url = m3u8_urls[0]
formats = self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_id='hls',
live=True, headers=self._M3U8_HEADERS)
stream_data = self._download_json(
'https://twitcasting.tv/streamserver.php',
video_id, 'Downloading live info', query={
'target': uploader_id,
'mode': 'client',
'player': 'pc_web',
})
if traverse_obj(stream_server_data, ('hls', 'source')):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_id='source',
live=True, query={'mode': 'source'},
note='Downloading source quality m3u8',
headers=self._M3U8_HEADERS, fatal=False))
formats = []
# low: 640x360, medium: 1280x720, high: 1920x1080
qq = qualities(['low', 'medium', 'high'])
for quality, m3u8_url in traverse_obj(stream_data, (
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({
'url': m3u8_url,
'format_id': f'hls-{quality}',
'ext': 'mp4',
'quality': qq(quality),
'protocol': 'm3u8',
'http_headers': self._M3U8_HEADERS,
})
if websockets:
qq = qualities(['base', 'mobilesource', 'main'])
streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {}
for mode, ws_url in streams.items():
for mode, ws_url in traverse_obj(stream_data, (
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({
'url': ws_url,
'format_id': f'ws-{mode}',
@@ -197,10 +201,15 @@ class TwitCastingIE(InfoExtractor):
'protocol': 'websocket_frag',
})
if not formats:
self.raise_login_required()
infodict = {
'formats': formats,
'_format_sort_fields': ('source', ),
}
elif not m3u8_urls:
raise ExtractorError('Failed to get m3u8 playlist')
elif len(m3u8_urls) == 1:
formats = self._extract_m3u8_formats(
m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS)

View File

@@ -1225,8 +1225,8 @@ class TwitchClipsIE(TwitchBaseIE):
'channel_id': ('broadcaster', 'id', {str}),
'channel_follower_count': ('broadcaster', 'followers', 'totalCount', {int_or_none}),
'channel_is_verified': ('broadcaster', 'isPartner', {bool}),
'uploader': ('broadcaster', 'displayName', {str}),
'uploader_id': ('broadcaster', 'id', {str}),
'uploader': ('curator', 'displayName', {str}),
'uploader_id': ('curator', 'id', {str}),
'categories': ('game', 'displayName', {str}, filter, all, filter),
}),
}

View File

@@ -1221,20 +1221,10 @@ class TwitterIE(TwitterBaseIE):
}]
_MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
@property
def _GRAPHQL_ENDPOINT(self):
if self.is_logged_in:
return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
_GRAPHQL_ENDPOINT = '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
def _graphql_to_legacy(self, data, twid):
result = traverse_obj(data, (
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
'tweet_results', 'result', ('tweet', None), {dict},
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
data, ('tweetResult', 'result', {dict}), default={})
result = traverse_obj(data, ('tweetResult', 'result', {dict})) or {}
typename = result.get('__typename')
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
@@ -1278,37 +1268,6 @@ class TwitterIE(TwitterBaseIE):
def _build_graphql_query(self, media_id):
return {
'variables': {
'focalTweetId': media_id,
'includePromotedContent': True,
'with_rux_injections': False,
'withBirdwatchNotes': True,
'withCommunity': True,
'withDownvotePerspective': False,
'withQuickPromoteEligibilityTweetFields': True,
'withReactionsMetadata': False,
'withReactionsPerspective': False,
'withSuperFollowsTweetFields': True,
'withSuperFollowsUserFields': True,
'withV2Timeline': True,
'withVoice': True,
},
'features': {
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
'interactive_text_enabled': True,
'responsive_web_edit_tweet_api_enabled': True,
'responsive_web_enhance_cards_enabled': True,
'responsive_web_graphql_timeline_navigation_enabled': False,
'responsive_web_text_conversations_enabled': False,
'responsive_web_uc_gql_enabled': True,
'standardized_nudges_misinfo': True,
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
'tweetypie_unmention_optimization_enabled': True,
'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
'verified_phone_label_enabled': False,
'vibe_api_enabled': True,
},
} if self.is_logged_in else {
'variables': {
'tweetId': media_id,
'withCommunity': False,
@@ -1717,21 +1676,22 @@ class TwitterSpacesIE(TwitterBaseIE):
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
_TESTS = [{
'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
'url': 'https://twitter.com/i/spaces/1OwxWwQOPlNxQ',
'info_dict': {
'id': '1RDxlgyvNXzJL',
'id': '1OwxWwQOPlNxQ',
'ext': 'm4a',
'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
'uploader': r're:Lucio Di Gaetano.*?',
'uploader_id': 'luciodigaetano',
'title': 'Everybody in: @mtbarra & @elonmusk discuss the future of EV charging',
'description': 'Twitter Space participated by Elon Musk',
'live_status': 'was_live',
'timestamp': 1659877956,
'upload_date': '20220807',
'release_timestamp': 1659904215,
'release_date': '20220807',
'release_date': '20230608',
'release_timestamp': 1686256230,
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
'timestamp': 1686254250,
'upload_date': '20230608',
'uploader': 'Mary Barra',
'uploader_id': 'mtbarra',
},
'skip': 'No longer available',
'params': {'skip_download': 'm3u8'},
}, {
# post_live/TimedOut but downloadable
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
@@ -1743,9 +1703,10 @@ class TwitterSpacesIE(TwitterBaseIE):
'uploader': 'Google Cloud',
'uploader_id': 'googlecloud',
'live_status': 'post_live',
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
'timestamp': 1681409554,
'upload_date': '20230413',
'release_timestamp': 1681839000,
'release_timestamp': 1681839082,
'release_date': '20230418',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
@@ -1762,6 +1723,9 @@ class TwitterSpacesIE(TwitterBaseIE):
'uploader': '息根とめる',
'uploader_id': 'tomeru_ikinone',
'live_status': 'was_live',
'release_date': '20230601',
'release_timestamp': 1685617200,
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
'timestamp': 1685617198,
'upload_date': '20230601',
'protocol': 'm3u8', # ffmpeg is forced
@@ -1779,9 +1743,10 @@ class TwitterSpacesIE(TwitterBaseIE):
'uploader': 'Candace Owens',
'uploader_id': 'RealCandaceO',
'live_status': 'was_live',
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
'timestamp': 1723931351,
'upload_date': '20240817',
'release_timestamp': 1723932000,
'release_timestamp': 1723932056,
'release_date': '20240817',
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
},
@@ -1861,18 +1826,21 @@ class TwitterSpacesIE(TwitterBaseIE):
return {
'id': space_id,
'title': metadata.get('title'),
'description': f'Twitter Space participated by {participants}',
'uploader': traverse_obj(
metadata, ('creator_results', 'result', 'legacy', 'name')),
'uploader_id': traverse_obj(
metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
'live_status': live_status,
'release_timestamp': try_call(
lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
'formats': formats,
'http_headers': headers,
'live_status': live_status,
**traverse_obj(metadata, {
'title': ('title', {str}),
# started_at is None when stream is_upcoming so fallback to scheduled_start for --wait-for-video
'release_timestamp': (('started_at', 'scheduled_start'), {int_or_none(scale=1000)}, any),
'timestamp': ('created_at', {int_or_none(scale=1000)}),
}),
**traverse_obj(metadata, ('creator_results', 'result', 'legacy', {
'uploader': ('name', {str}),
'uploader_id': ('screen_name', {str_or_none}),
'thumbnail': ('profile_image_url_https', {lambda x: x.replace('_normal', '_400x400')}, {url_or_none}),
})),
}

View File

@@ -39,6 +39,14 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_NETRC_MACHINE = 'vimeo'
_LOGIN_REQUIRED = False
_LOGIN_URL = 'https://vimeo.com/log_in'
_IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw=='
_IOS_CLIENT_HEADERS = {
'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
'Accept-Language': 'en',
'User-Agent': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0',
}
_IOS_OAUTH_CACHE_KEY = 'oauth-token-ios'
_ios_oauth_token = None
@staticmethod
def _smuggle_referrer(url, referrer_url):
@@ -88,13 +96,16 @@ class VimeoBaseInfoExtractor(InfoExtractor):
expected=True)
return password
def _verify_video_password(self, video_id, password, token):
def _verify_video_password(self, video_id):
video_password = self._get_video_password()
token = self._download_json(
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')['xsrft']
url = f'https://vimeo.com/{video_id}'
try:
return self._download_webpage(
self._request_webpage(
f'{url}/password', video_id,
'Submitting video password', data=json.dumps({
'password': password,
'password': video_password,
'token': token,
}, separators=(',', ':')).encode(), headers={
'Accept': '*/*',
@@ -239,20 +250,39 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
}
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
def _fetch_oauth_token(self):
if not self._ios_oauth_token:
self._ios_oauth_token = self.cache.load(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY)
if not self._ios_oauth_token:
self._ios_oauth_token = self._download_json(
'https://api.vimeo.com/oauth/authorize/client', None,
'Fetching OAuth token', 'Failed to fetch OAuth token',
headers={
'Authorization': f'Basic {self._IOS_CLIENT_AUTH}',
**self._IOS_CLIENT_HEADERS,
}, data=urlencode_postdata({
'grant_type': 'client_credentials',
'scope': 'private public create edit delete interact upload purchased stats',
}, quote_via=urllib.parse.quote))['access_token']
self.cache.store(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY, self._ios_oauth_token)
return self._ios_oauth_token
def _call_videos_api(self, video_id, unlisted_hash=None, **kwargs):
return self._download_json(
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json',
'Authorization': f'Bearer {self._fetch_oauth_token()}',
**self._IOS_CLIENT_HEADERS,
}, query={
'fields': ','.join((
'config_url', 'created_time', 'description', 'download', 'license',
'metadata.connections.comments.total', 'metadata.connections.likes.total',
'release_time', 'stats.plays')),
'config_url', 'embed_player_config_url', 'player_embed_url', 'download', 'play',
'files', 'description', 'license', 'release_time', 'created_time', 'stats.plays',
'metadata.connections.comments.total', 'metadata.connections.likes.total')),
}, **kwargs)
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=None):
# Original/source formats are only available when logged in
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
return
@@ -283,12 +313,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'quality': 1,
}
jwt = jwt or traverse_obj(self._download_json(
'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
if not jwt:
return
original_response = api_data or self._call_videos_api(
video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
video_id, unlisted_hash, fatal=False, expected_status=(403, 404))
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
download_url = download_data.get('link')
if not download_url or download_data.get('quality') != 'source':
@@ -410,6 +436,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 10,
'comment_count': int,
'like_count': int,
'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d',
},
'params': {
@@ -500,15 +527,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'The DMCI',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
'uploader_id': 'dmci',
'timestamp': 1324343742,
'timestamp': 1324361742,
'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
'description': 'md5:f37b4ad0f3ded6fa16f38ecde16c3c44',
'duration': 60,
'comment_count': int,
'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d',
'like_count': int,
'tags': 'count:11',
'release_timestamp': 1324361742,
'release_date': '20111220',
},
# 'params': {'format': 'Original'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
@@ -521,15 +549,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '393756517',
# 'ext': 'mov',
'ext': 'mp4',
'timestamp': 1582642091,
'timestamp': 1582660091,
'uploader_id': 'frameworkla',
'title': 'Straight To Hell - Sabrina: Netflix',
'uploader': 'Framework Studio',
'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
'upload_date': '20200225',
'duration': 176,
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d',
'uploader_url': 'https://vimeo.com/frameworkla',
'comment_count': int,
'like_count': int,
'release_timestamp': 1582660091,
'release_date': '20200225',
},
# 'params': {'format': 'source'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
@@ -630,7 +661,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'description': str, # FIXME: Dynamic SEO spam description
'upload_date': '20150209',
'timestamp': 1423518307,
'thumbnail': 'https://i.vimeocdn.com/video/default',
'thumbnail': r're:https://i\.vimeocdn\.com/video/default',
'duration': 10,
'like_count': int,
'uploader_url': 'https://vimeo.com/user20132939',
@@ -667,6 +698,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': int,
'uploader_url': 'https://vimeo.com/aliniamedia',
'release_date': '20160329',
'view_count': int,
},
'params': {'skip_download': True},
'expected_warnings': ['Failed to parse XML: not well-formed'],
@@ -678,18 +710,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
# 'ext': 'm4v',
'ext': 'mp4',
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
'description': 'md5:9441e6829ae94f380cc6417d982f63ac',
'uploader_id': 'fireworkchampions',
'uploader': 'Firework Champions',
'upload_date': '20150910',
'timestamp': 1441901895,
'timestamp': 1441916295,
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d',
'uploader_url': 'https://vimeo.com/fireworkchampions',
'tags': 'count:6',
'duration': 229,
'view_count': int,
'like_count': int,
'comment_count': int,
'release_timestamp': 1441916295,
'release_date': '20150910',
},
'params': {
'skip_download': True,
@@ -820,7 +853,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Raja Virdi',
'uploader_id': 'rajavirdi',
'uploader_url': 'https://vimeo.com/rajavirdi',
'duration': 309,
'duration': 300,
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d',
},
# 'params': {'format': 'source'},
@@ -860,12 +893,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
return checked
def _extract_from_api(self, video_id, unlisted_hash=None):
viewer = self._download_json(
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
for retry in (False, True):
try:
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
video = self._call_videos_api(video_id, unlisted_hash)
break
except ExtractorError as e:
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
@@ -873,15 +903,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
({json.loads}, 'invalid_parameters', ..., 'field'),
)):
self._verify_video_password(
video_id, self._get_video_password(), viewer['xsrft'])
self._verify_video_password(video_id)
continue
raise
info = self._parse_config(self._download_json(
video['config_url'], video_id), video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, api_data=video)
if source_format:
info['formats'].append(source_format)
@@ -1122,7 +1151,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998',
'upload_date': '20140906',
'timestamp': 1410032453,
'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280',
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
'comment_count': int,
'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/',
'duration': 53,
@@ -1132,7 +1161,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
'params': {
'format': 'best[protocol=https]',
},
'expected_warnings': ['Unable to download JSON metadata'],
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# requires Referer to be passed along with og:video:url
'url': 'https://vimeo.com/ondemand/36938/126682985',
@@ -1149,13 +1178,14 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
'duration': 121,
'comment_count': int,
'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280',
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
'like_count': int,
'tags': 'count:5',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download JSON metadata'],
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
'url': 'https://vimeo.com/ondemand/nazmaalik',
'only_matching': True,
@@ -1237,7 +1267,7 @@ class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
_TESTS = [{
'url': 'https://vimeo.com/nkistudio/videos',
'info_dict': {
'title': 'Nki',
'title': 'AKAMA',
'id': 'nkistudio',
},
'playlist_mincount': 66,
@@ -1370,10 +1400,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'uploader_id': 'user170863801',
'uploader_url': 'https://vimeo.com/user170863801',
'duration': 30,
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML'],
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -1423,12 +1453,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
data = self._download_json(data_url, video_id)
viewer = {}
if data.get('isLocked') is True:
video_password = self._get_video_password()
viewer = self._download_json(
'https://vimeo.com/_rv/viewer', video_id)
self._verify_video_password(video_id, video_password, viewer['xsrft'])
self._verify_video_password(video_id)
data = self._download_json(data_url, video_id)
clip_data = data['clipData']
config_url = clip_data['configUrl']
@@ -1436,7 +1462,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
info_dict = self._parse_config(config, video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
video_id, unlisted_hash=clip_data.get('unlistedHash'))
if source_format:
info_dict['formats'].append(source_format)
info_dict['description'] = clean_html(clip_data.get('description'))
@@ -1528,20 +1554,22 @@ class VimeoProIE(VimeoBaseInfoExtractor):
'uploader_id': 'openstreetmapus',
'uploader': 'OpenStreetMap US',
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
'description': 'md5:2c362968038d4499f4d79f88458590c1',
'description': 'md5:8cf69a1a435f2d763f4adf601e9c3125',
'duration': 1595,
'upload_date': '20130610',
'timestamp': 1370893156,
'timestamp': 1370907556,
'license': 'by',
'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
'view_count': int,
'comment_count': int,
'like_count': int,
'tags': 'count:1',
'release_timestamp': 1370907556,
'release_date': '20130610',
},
'params': {
'format': 'best[protocol=https]',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# password-protected VimeoPro page with Vimeo player embed
'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
@@ -1549,7 +1577,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
'id': '764543723',
'ext': 'mp4',
'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280',
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
'uploader': 'CADFEM',
'uploader_id': 'cadfem',
@@ -1561,6 +1589,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
'videopassword': 'Conference2022',
'skip_download': True,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}]
def _real_extract(self, url):

View File

@@ -300,6 +300,24 @@ class VKIE(VKBaseIE):
'upload_date': '20250130',
},
},
{
'url': 'https://vkvideo.ru/video-50883936_456244102',
'info_dict': {
'id': '-50883936_456244102',
'ext': 'mp4',
'title': 'Добивание Украины // Техник в коме // МОЯ ЗЛОСТЬ №140',
'description': 'md5:a9bc46181e9ebd0fdd82cef6c0191140',
'uploader': 'Стас Ай, Как Просто!',
'uploader_id': '-50883936',
'comment_count': int,
'like_count': int,
'duration': 4651,
'thumbnail': r're:https?://.+\.jpg',
'chapters': 'count:59',
'timestamp': 1743333869,
'upload_date': '20250330',
},
},
{
# live stream, hls and rtmp links, most likely already finished live
# stream by the time you are reading this comment
@@ -540,7 +558,7 @@ class VKIE(VKBaseIE):
'title': ('md_title', {unescapeHTML}),
'description': ('description', {clean_html}, filter),
'thumbnail': ('jpg', {url_or_none}),
'uploader': ('md_author', {str}),
'uploader': ('md_author', {unescapeHTML}),
'uploader_id': (('author_id', 'authorId'), {str_or_none}, any),
'duration': ('duration', {int_or_none}),
'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), {

View File

@@ -2,9 +2,11 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
try_get,
unified_strdate,
)
from ..utils.traversal import traverse_obj
class WatIE(InfoExtractor):
@@ -70,8 +72,14 @@ class WatIE(InfoExtractor):
error_desc = video_info.get('error_desc')
if error_desc:
if video_info.get('error_code') == 'GEOBLOCKED':
error_code = video_info.get('error_code')
if error_code == 'GEOBLOCKED':
self.raise_geo_restricted(error_desc, video_info.get('geoList'))
elif error_code == 'DELIVERY_ERROR':
if traverse_obj(video_data, ('delivery', 'code')) == 500:
self.report_drm(video_id)
error_desc = join_nonempty(
error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ')
raise ExtractorError(error_desc, expected=True)
title = video_info['title']

View File

@@ -290,12 +290,14 @@ class WeverseIE(WeverseBaseIE):
elif live_status == 'is_live':
video_info = self._call_api(
f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
f'/video/v1.3/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
video_id, note='Downloading live JSON')
playback = self._parse_json(video_info['lipPlayback'], video_id)
m3u8_url = traverse_obj(playback, (
'media', lambda _, v: v['protocol'] == 'HLS', 'path', {url_or_none}), get_all=False)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
# Live subtitles are not downloadable, but extract to silence "ignoring subs" warning
formats, _ = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
elif live_status == 'post_live':
if availability in ('premium_only', 'subscriber_only'):

View File

@@ -417,6 +417,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_NETRC_MACHINE = 'youtube'
_COOKIE_HOWTO_WIKI_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies'
def ucid_or_none(self, ucid):
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
@@ -451,17 +453,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return preferred_lang
def _initialize_consent(self):
cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'):
if self._has_auth_cookies:
return
socs = cookies.get('SOCS')
socs = self._youtube_cookies.get('SOCS')
if socs and not socs.value.startswith('CAA'): # not consented
return
self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
def _initialize_pref(self):
cookies = self._get_cookies('https://www.youtube.com/')
pref_cookie = cookies.get('PREF')
pref_cookie = self._youtube_cookies.get('PREF')
pref = {}
if pref_cookie:
try:
@@ -472,8 +472,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
def _initialize_cookie_auth(self):
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
if yt_sapisid or yt_1psapisid or yt_3psapisid:
self._passed_auth_cookies = False
if self._has_auth_cookies:
self._passed_auth_cookies = True
self.write_debug('Found YouTube account cookies')
def _real_initialize(self):
@@ -492,8 +493,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
@property
def _youtube_login_hint(self):
return (f'{self._login_hint(method="cookies")}. Also see '
'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies '
return (f'{self._login_hint(method="cookies")}. Also see {self._COOKIE_HOWTO_WIKI_URL} '
'for tips on effectively exporting YouTube cookies')
def _check_login_required(self):
@@ -553,12 +553,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return f'{scheme} {"_".join(parts)}'
@property
def _youtube_cookies(self):
return self._get_cookies('https://www.youtube.com')
def _get_sid_cookies(self):
"""
Get SAPISID, 1PSAPISID, 3PSAPISID cookie values
@returns sapisid, 1psapisid, 3psapisid
"""
yt_cookies = self._get_cookies('https://www.youtube.com')
yt_cookies = self._youtube_cookies
yt_sapisid = try_call(lambda: yt_cookies['SAPISID'].value)
yt_3papisid = try_call(lambda: yt_cookies['__Secure-3PAPISID'].value)
yt_1papisid = try_call(lambda: yt_cookies['__Secure-1PAPISID'].value)
@@ -595,6 +599,31 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return ' '.join(authorizations)
@property
def is_authenticated(self):
return self._has_auth_cookies
@property
def _has_auth_cookies(self):
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
# YouTube doesn't appear to clear 3PSAPISID when rotating cookies (as of 2025-04-26)
# But LOGIN_INFO is cleared and should exist if logged in
has_login_info = 'LOGIN_INFO' in self._youtube_cookies
return bool(has_login_info and (yt_sapisid or yt_1psapisid or yt_3psapisid))
def _request_webpage(self, *args, **kwargs):
response = super()._request_webpage(*args, **kwargs)
# Check that we are still logged-in and cookies have not rotated after every request
if getattr(self, '_passed_auth_cookies', None) and not self._has_auth_cookies:
self.report_warning(
'The provided YouTube account cookies are no longer valid. '
'They have likely been rotated in the browser as a security measure. '
f'For tips on how to effectively export YouTube cookies, refer to {self._COOKIE_HOWTO_WIKI_URL} .',
only_once=False)
return response
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
note='Downloading API JSON', errnote='Unable to download API page',
context=None, api_key=None, api_hostname=None, default_client='web'):
@@ -695,10 +724,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
expected_type=str)
@functools.cached_property
def is_authenticated(self):
return bool(self._get_sid_authorization_header())
def extract_ytcfg(self, video_id, webpage):
if not webpage:
return {}

View File

@@ -37,6 +37,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
'chapters': 'count:20',
'comment_count': int,
'heatmap': 'count:100',
'media_type': 'clip',
},
}]
@@ -59,6 +60,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
'url': f'https://www.youtube.com/watch?v={video_id}',
'ie_key': YoutubeIE.ie_key(),
'id': clip_id,
'media_type': 'clip',
'section_start': int(clip_data['startTimeMs']) / 1000,
'section_end': int(clip_data['endTimeMs']) / 1000,
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility

View File

@@ -35,6 +35,7 @@ class YoutubeYtBeIE(YoutubeBaseInfoExtractor):
'duration': 59,
'comment_count': int,
'channel_follower_count': int,
'media_type': 'short',
},
'params': {
'noplaylist': True,

View File

@@ -376,6 +376,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Afrojack',
'uploader_url': 'https://www.youtube.com/@Afrojack',
'uploader_id': '@Afrojack',
'media_type': 'video',
},
'params': {
'youtube_include_dash_manifest': True,
@@ -413,10 +414,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_is_verified': True,
'heatmap': 'count:100',
'timestamp': 1401991663,
'media_type': 'video',
},
},
{
'note': 'Age-gate video with embed allowed in public site',
'note': 'Formerly an age-gate video with embed allowed in public site',
'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
'info_dict': {
'id': 'HsUATh_Nc2U',
@@ -424,8 +426,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Godzilla 2 (Official Video)',
'description': 'md5:bf77e03fcae5529475e500129b05668a',
'upload_date': '20200408',
'age_limit': 18,
'availability': 'needs_auth',
'age_limit': 0,
'availability': 'public',
'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
'channel': 'FlyingKitty',
'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
@@ -443,8 +445,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@FlyingKitty900',
'comment_count': int,
'channel_is_verified': True,
'media_type': 'video',
},
'skip': 'Age-restricted; requires authentication',
},
{
'note': 'Age-gate video embedable only with clientScreen=EMBED',
@@ -507,6 +509,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Herr Lurik',
'uploader_url': 'https://www.youtube.com/@HerrLurik',
'uploader_id': '@HerrLurik',
'media_type': 'video',
},
},
{
@@ -546,6 +549,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'deadmau5',
'uploader_url': 'https://www.youtube.com/@deadmau5',
'uploader_id': '@deadmau5',
'media_type': 'video',
},
'expected_warnings': [
'DASH manifest missing',
@@ -581,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@Olympics',
'channel_is_verified': True,
'timestamp': 1440707674,
'media_type': 'livestream',
},
'params': {
'skip_download': 'requires avconv',
@@ -615,6 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@AllenMeow',
'uploader_id': '@AllenMeow',
'timestamp': 1299776999,
'media_type': 'video',
},
},
# url_encoded_fmt_stream_map is empty string
@@ -809,6 +815,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'like_count': int,
'age_limit': 0,
'channel_follower_count': int,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -868,6 +875,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@BKCHarvard',
'uploader_url': 'https://www.youtube.com/@BKCHarvard',
'timestamp': 1422422076,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -904,6 +912,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_is_verified': True,
'heatmap': 'count:100',
'timestamp': 1447987198,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -968,6 +977,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'timestamp': 1484761047,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -1070,6 +1080,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tags': 'count:11',
'live_status': 'not_live',
'channel_follower_count': int,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -1124,6 +1135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
'uploader_id': '@ElevageOrVert',
'timestamp': 1497343210,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -1163,6 +1175,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_is_verified': True,
'heatmap': 'count:100',
'timestamp': 1377976349,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -1207,6 +1220,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_follower_count': int,
'uploader': 'The Cinematic Orchestra',
'comment_count': int,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -1275,6 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
'uploader_id': '@walkaroundjapan7124',
'timestamp': 1605884416,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -1371,6 +1386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_is_verified': True,
'heatmap': 'count:100',
'timestamp': 1395685455,
'media_type': 'video',
}, 'params': {'format': 'mhtml', 'skip_download': True},
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@@ -1401,6 +1417,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@LeonNguyen',
'heatmap': 'count:100',
'timestamp': 1641170939,
'media_type': 'video',
},
}, {
# date text is premiered video, ensure upload date in UTC (published 1641172509)
@@ -1434,6 +1451,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_is_verified': True,
'heatmap': 'count:100',
'timestamp': 1641172509,
'media_type': 'video',
},
},
{ # continuous livestream.
@@ -1495,6 +1513,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Lesmiscore',
'uploader_url': 'https://www.youtube.com/@lesmiscore',
'timestamp': 1648005313,
'media_type': 'short',
},
}, {
# Prefer primary title+description language metadata by default
@@ -1523,6 +1542,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
'timestamp': 1662677394,
'media_type': 'video',
},
'params': {'skip_download': True},
}, {
@@ -1551,6 +1571,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'cole-dlp-test-acc',
'timestamp': 1659073275,
'like_count': int,
'media_type': 'video',
},
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
'expected_warnings': [r'Preferring "fr" translated fields'],
@@ -1587,6 +1608,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
'media_type': 'video',
},
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
}, {
@@ -1687,6 +1709,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
'media_type': 'video',
},
'params': {
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
@@ -1719,6 +1742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_follower_count': int,
'categories': ['People & Blogs'],
'tags': [],
'media_type': 'short',
},
},
]
@@ -1754,6 +1778,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@ChristopherSykesDocumentaries',
'heatmap': 'count:100',
'timestamp': 1211825920,
'media_type': 'video',
},
'params': {
'skip_download': True,
@@ -1819,6 +1844,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
continue
# Formats from ended premieres will be missing a manifest_url
# See https://github.com/yt-dlp/yt-dlp/issues/8543
if not f.get('manifest_url'):
break
return f['manifest_url'], f['manifest_stream_number'], is_live
return None
@@ -1982,7 +2013,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _player_js_cache_key(self, player_url):
player_id = self._extract_player_info(player_url)
player_path = remove_start(urllib.parse.urlparse(player_url).path, f'/s/player/{player_id}/')
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path)
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path) or next((
v for k, v in self._INVERSE_PLAYER_JS_VARIANT_MAP.items()
if re.fullmatch(re.escape(k).replace('en_US', r'[a-zA-Z0-9_]+'), player_path)), None)
if not variant:
self.write_debug(
f'Unable to determine player JS variant\n'
@@ -2120,23 +2153,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret
return inner
def _load_nsig_code_from_cache(self, player_url):
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
def _load_player_data_from_cache(self, name, player_url):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
if func_code := self._player_cache.get(cache_id):
return func_code
if data := self._player_cache.get(cache_id):
return data
func_code = self.cache.load(*cache_id, min_ver='2025.03.31')
if func_code:
self._player_cache[cache_id] = func_code
data = self.cache.load(*cache_id, min_ver='2025.03.31')
if data:
self._player_cache[cache_id] = data
return func_code
return data
def _store_nsig_code_to_cache(self, player_url, func_code):
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
def _store_player_data_to_cache(self, name, player_url, data):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
if cache_id not in self._player_cache:
self.cache.store(*cache_id, func_code)
self._player_cache[cache_id] = func_code
self.cache.store(*cache_id, data)
self._player_cache[cache_id] = data
def _decrypt_signature(self, s, video_id, player_url):
"""Turn the encrypted s field into a working signature"""
@@ -2179,7 +2212,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.write_debug(f'Decrypted nsig {s} => {ret}')
# Only cache nsig func JS code to disk if successful, and only once
self._store_nsig_code_to_cache(player_url, func_code)
self._store_player_data_to_cache('nsig', player_url, func_code)
return ret
def _extract_n_function_name(self, jscode, player_url=None):
@@ -2298,7 +2331,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
func_code = self._load_nsig_code_from_cache(player_url)
func_code = self._load_player_data_from_cache('nsig', player_url)
jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode)
@@ -2334,23 +2367,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
Extract signatureTimestamp (sts)
Required to tell API what sig/player version is in use.
"""
sts = None
if isinstance(ytcfg, dict):
sts = int_or_none(ytcfg.get('STS'))
if sts := traverse_obj(ytcfg, ('STS', {int_or_none})):
return sts
if not player_url:
error_msg = 'Cannot extract signature timestamp without player url'
if fatal:
raise ExtractorError(error_msg)
self.report_warning(error_msg)
return None
sts = self._load_player_data_from_cache('sts', player_url)
if sts:
return sts
if code := self._load_player(video_id, player_url, fatal=fatal):
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
'JS player signature timestamp', group='sts', fatal=fatal))
if sts:
self._store_player_data_to_cache('sts', player_url, sts)
if not sts:
# Attempt to extract from player
if player_url is None:
error_msg = 'Cannot extract signature timestamp without player_url.'
if fatal:
raise ExtractorError(error_msg)
self.report_warning(error_msg)
return
code = self._load_player(video_id, player_url, fatal=fatal)
if code:
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
'JS player signature timestamp', group='sts', fatal=fatal))
return sts
def _mark_watched(self, video_id, player_responses):
@@ -3103,9 +3140,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
prs.append(pr)
# web_embedded can work around age-gate and age-verification for some embeddable videos
if self._is_agegated(pr) and variant != 'web_embedded':
append_client(f'web_embedded.{base_client}')
# Unauthenticated users will only get web_embedded client formats if age-gated
if self._is_agegated(pr) and not self.is_authenticated:
self.to_screen(
f'{video_id}: This video is age-restricted; some formats may be missing '
f'without authentication. {self._youtube_login_hint}', only_once=True)
# EU countries require age-verification for accounts to access age-restricted videos
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
if self.is_authenticated and self._is_agegated(pr):
embedding_is_disabled = variant == 'web_embedded' and self._is_unplayable(pr)
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
self.to_screen(
f'{video_id}: This video is age-restricted and YouTube is requiring '
'account age-verification; some formats may be missing', only_once=True)
@@ -3232,12 +3279,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
if not all((sc, fmt_url, player_url, encrypted_sig)):
self.report_warning(
f'Some {client_name} client https formats have been skipped as they are missing a url. '
f'{"Your account" if self.is_authenticated else "The current session"} may have '
f'the SSAP (server-side ads) experiment which interferes with yt-dlp. '
f'Please see https://github.com/yt-dlp/yt-dlp/issues/12482 for more details.',
video_id, only_once=True)
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
if client_name == 'web':
msg += 'YouTube is forcing SABR streaming for this client. '
else:
msg += (
f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
f'{"your account" if self.is_authenticated else "the current session"}. '
)
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
self.report_warning(msg, video_id, only_once=True)
continue
try:
fmt_url += '&{}={}'.format(
@@ -3324,8 +3375,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'width': int_or_none(fmt.get('width')),
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
# Strictly de-prioritize broken, damaged and 3gp formats
'preference': -20 if require_po_token else -10 if is_damaged else -2 if itag == '17' else None,
# Strictly de-prioritize damaged and 3gp formats
'preference': -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
@@ -3648,6 +3699,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
reason += '. YouTube is requiring a captcha challenge before playback'
elif "This content isn't available, try again later" in reason:
reason = (
f'{remove_end(reason.strip(), ".")}. {"Your account" if self.is_authenticated else "The current session"} '
f'has been rate-limited by YouTube for up to an hour. It is recommended to use `-t sleep` to add a delay '
f'between video requests to avoid exceeding the rate limit. For more information, refer to '
f'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#this-content-isnt-available-try-again-later'
)
self.raise_no_formats(reason, expected=True)
keywords = get_first(video_details, 'keywords', expected_type=list) or []
@@ -3754,7 +3812,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tags': keywords,
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
'live_status': live_status,
'media_type': 'livestream' if get_first(video_details, 'isLiveContent') else None,
'media_type': (
'livestream' if get_first(video_details, 'isLiveContent')
else 'short' if get_first(microformats, 'isShortsEligible')
else 'video'),
'release_timestamp': live_start_time,
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),

File diff suppressed because it is too large Load Diff