1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-18 03:08:31 +00:00

Merge remote-tracking branch 'upstream/master' into feat/youtube/sabr

# Conflicts:
#	yt_dlp/extractor/youtube/_video.py
This commit is contained in:
coletdjnz 2025-06-27 17:03:52 +12:00
commit 1fdd744253
No known key found for this signature in database
GPG Key ID: 91984263BB39894A
18 changed files with 400 additions and 131 deletions

View File

@ -779,3 +779,5 @@ brian6932
iednod55
maxbin123
nullpos
anlar
eason1478

View File

@ -4,6 +4,25 @@ # Changelog
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
### 2025.06.25
#### Extractor changes
- [Add `_search_nuxt_json` helper](https://github.com/yt-dlp/yt-dlp/commit/51887484e46ab6015c041cb1ab626a55f25a03bd) ([#13386](https://github.com/yt-dlp/yt-dlp/issues/13386)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- **brightcove**: new: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e6bd4a3da295b760ab20b39c18ce8934d312c2bf) ([#13461](https://github.com/yt-dlp/yt-dlp/issues/13461)) by [doe1080](https://github.com/doe1080)
- **huya**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2600849badb0d08c55b58dcc77a13af6ba423da6) ([#13520](https://github.com/yt-dlp/yt-dlp/issues/13520)) by [doe1080](https://github.com/doe1080)
- **hypergryph**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1722c55400ff30bb5aee5dd7a262f0b7e9ce2f0e) ([#13415](https://github.com/yt-dlp/yt-dlp/issues/13415)) by [doe1080](https://github.com/doe1080), [eason1478](https://github.com/eason1478)
- **lsm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c57412d1f9cf0124adc972a47858ac42b740c61d) ([#13126](https://github.com/yt-dlp/yt-dlp/issues/13126)) by [Caesim404](https://github.com/Caesim404)
- **mave**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1838a1ce5d4ade80770ba9162eaffc9a1607dc70) ([#13380](https://github.com/yt-dlp/yt-dlp/issues/13380)) by [anlar](https://github.com/anlar)
- **sportdeutschland**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ce4327c9836691d3b6b00e44a90b6741601ed8) ([#13519](https://github.com/yt-dlp/yt-dlp/issues/13519)) by [DTrombett](https://github.com/DTrombett)
- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5b559d0072b7164daf06bacdc41c6f11283452c8) ([#13544](https://github.com/yt-dlp/yt-dlp/issues/13544)) by [bashonly](https://github.com/bashonly)
- **tv8.it**: [Support slugless URLs](https://github.com/yt-dlp/yt-dlp/commit/3bd30291601c47fa4a257983473884103ecab0c7) ([#13478](https://github.com/yt-dlp/yt-dlp/issues/13478)) by [DTrombett](https://github.com/DTrombett)
- **youtube**
- [Check any `ios` m3u8 formats prior to download](https://github.com/yt-dlp/yt-dlp/commit/8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f) ([#13524](https://github.com/yt-dlp/yt-dlp/issues/13524)) by [bashonly](https://github.com/bashonly)
- [Improve player context payloads](https://github.com/yt-dlp/yt-dlp/commit/ff6f94041aeee19c5559e1c1cd693960a1c1dd14) ([#13539](https://github.com/yt-dlp/yt-dlp/issues/13539)) by [bashonly](https://github.com/bashonly)
#### Misc. changes
- **test**: `traversal`: [Fix morsel tests for Python 3.14](https://github.com/yt-dlp/yt-dlp/commit/73bf10211668e4a59ccafd790e06ee82d9fea9ea) ([#13471](https://github.com/yt-dlp/yt-dlp/issues/13471)) by [Grub4K](https://github.com/Grub4K)
### 2025.06.09
#### Extractor changes

View File

@ -590,7 +590,7 @@ # Supported sites
- **Hungama**
- **HungamaAlbumPlaylist**
- **HungamaSong**
- **huya:live**: huya.com
- **huya:live**: 虎牙直播
- **huya:video**: 虎牙视频
- **Hypem**
- **Hytale**
@ -776,6 +776,7 @@ # Supported sites
- **massengeschmack.tv**
- **Masters**
- **MatchTV**
- **Mave**
- **MBN**: mbn.co.kr (매일방송)
- **MDR**: MDR.DE
- **MedalTV**
@ -832,7 +833,7 @@ # Supported sites
- **Mojevideo**: mojevideo.sk
- **Mojvideo**
- **Monstercat**
- **MonsterSirenHypergryphMusic**
- **monstersiren**: 塞壬唱片
- **Motherless**
- **MotherlessGallery**
- **MotherlessGroup**

View File

@ -2219,6 +2219,7 @@ def _check_formats(self, formats):
self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
f['__working'] = success
if success:
f.pop('__needs_testing', None)
yield f
else:
self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
@ -3965,6 +3966,7 @@ def simplified_codec(f, field):
self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
(self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
self._format_out('Untested', self.Styles.WARNING) if f.get('__needs_testing') else None,
format_field(f, 'format_note'),
format_field(f, 'container', ignore=(None, f.get('ext'))),
delim=', '), delim=' '),

View File

@ -5,47 +5,46 @@
from .common import FileDownloader
from .external import FFmpegFD
from ..networking import Request
from ..utils import DownloadError, str_or_none, try_get
from ..networking.websocket import WebSocketResponse
from ..utils import DownloadError, str_or_none, truncate_string
from ..utils.traversal import traverse_obj
class NiconicoLiveFD(FileDownloader):
""" Downloads niconico live without being stopped """
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
ws_url = info_dict['url']
ws_extractor = info_dict['ws']
ws_origin_host = info_dict['origin']
live_quality = info_dict.get('live_quality', 'high')
live_latency = info_dict.get('live_latency', 'high')
video_id = info_dict['id']
opts = info_dict['downloader_options']
quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url']
dl = FFmpegFD(self.ydl, self.params or {})
new_info_dict = info_dict.copy()
new_info_dict.update({
'protocol': 'm3u8',
})
new_info_dict['protocol'] = 'm3u8'
def communicate_ws(reconnect):
if reconnect:
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
# Support --load-info-json as if it is a reconnect attempt
if reconnect or not isinstance(ws_extractor, WebSocketResponse):
ws = self.ydl.urlopen(Request(
ws_url, headers={'Origin': 'https://live.nicovideo.jp'}))
if self.ydl.params.get('verbose', False):
self.to_screen('[debug] Sending startWatching request')
self.write_debug('Sending startWatching request')
ws.send(json.dumps({
'type': 'startWatching',
'data': {
'reconnect': True,
'room': {
'commentable': True,
'protocol': 'webSocket',
},
'stream': {
'quality': live_quality,
'protocol': 'hls+fmp4',
'latency': live_latency,
'accessRightMethod': 'single_cookie',
'chasePlay': False,
'latency': 'high',
'protocol': 'hls',
'quality': quality,
},
'room': {
'protocol': 'webSocket',
'commentable': True,
},
'reconnect': True,
},
'type': 'startWatching',
}))
else:
ws = ws_extractor
@ -58,7 +57,6 @@ def communicate_ws(reconnect):
if not data or not isinstance(data, dict):
continue
if data.get('type') == 'ping':
# pong back
ws.send(r'{"type":"pong"}')
ws.send(r'{"type":"keepSeat"}')
elif data.get('type') == 'disconnect':
@ -66,12 +64,10 @@ def communicate_ws(reconnect):
return True
elif data.get('type') == 'error':
self.write_debug(data)
message = try_get(data, lambda x: x['body']['code'], str) or recv
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
return DownloadError(message)
elif self.ydl.params.get('verbose', False):
if len(recv) > 100:
recv = recv[:100] + '...'
self.to_screen(f'[debug] Server said: {recv}')
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
def ws_main():
reconnect = False
@ -81,7 +77,8 @@ def ws_main():
if ret is True:
return
except BaseException as e:
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
self.to_screen(
f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}')
time.sleep(10)
continue
finally:

View File

@ -1107,6 +1107,7 @@
from .massengeschmacktv import MassengeschmackTVIE
from .masters import MastersIE
from .matchtv import MatchTVIE
from .mave import MaveIE
from .mbn import MBNIE
from .mdr import MDRIE
from .medaltv import MedalTVIE

View File

@ -1226,6 +1226,26 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
'id': '313580179',
},
'playlist_mincount': 92,
}, {
# Hidden-mode collection
'url': 'https://space.bilibili.com/3669403/video',
'info_dict': {
'id': '3669403',
},
'playlist': [{
'info_dict': {
'_type': 'playlist',
'id': '3669403_3958082',
'title': '合集·直播回放',
'description': '',
'uploader': '月路Yuel',
'uploader_id': '3669403',
'timestamp': int,
'upload_date': str,
'thumbnail': str,
},
}],
'params': {'playlist_items': '7'},
}]
def _real_extract(self, url):
@ -1282,8 +1302,14 @@ def get_metadata(page_data):
}
def get_entries(page_data):
for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
for entry in traverse_obj(page_data, ('list', 'vlist', ..., {dict})):
if traverse_obj(entry, ('meta', 'attribute')) == 156:
# hidden-mode collection doesn't show its videos in uploads; extract as playlist instead
yield self.url_result(
f'https://space.bilibili.com/{entry["mid"]}/lists/{entry["meta"]["id"]}?type=season',
BilibiliCollectionListIE, f'{entry["mid"]}_{entry["meta"]["id"]}')
else:
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
return self.playlist_result(paged_list, playlist_id)

View File

@ -11,7 +11,7 @@
class CloudyCDNIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
_VALID_URL = r'(?:https?:)?//embed\.(?P<domain>cloudycdn\.services|backscreen\.com)/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
@ -23,7 +23,7 @@ class CloudyCDNIE(InfoExtractor):
'duration': 1442,
'upload_date': '20231121',
'title': 'D23-6000-105_cetstud',
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
},
}, {
'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
@ -33,7 +33,7 @@ class CloudyCDNIE(InfoExtractor):
'ext': 'mp4',
'title': 'LV-8-5-1',
'timestamp': 1669767167,
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
'duration': 1205,
'upload_date': '20221130',
},
@ -48,9 +48,21 @@ class CloudyCDNIE(InfoExtractor):
'duration': 1673,
'title': 'D24-6000-074-cetstud',
'timestamp': 1718902233,
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
},
'params': {'format': 'bv'},
}, {
'url': 'https://embed.backscreen.com/ltv/media/32j_z25-0600-127?',
'md5': '9b6fa09ac1a4de53d4f42b94affc3b42',
'info_dict': {
'id': '32j_z25-0600-127',
'ext': 'mp4',
'title': 'Z25-0600-127-DZ',
'duration': 1906,
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/977427/placeholder1746633646.jpg',
'timestamp': 1746632402,
'upload_date': '20250507',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
@ -60,17 +72,17 @@ class CloudyCDNIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20230223',
'duration': 629,
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
'timestamp': 1677181513,
'title': 'LIB-2',
},
}]
def _real_extract(self, url):
site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
domain, site_id, video_id = self._match_valid_url(url).group('domain', 'site_id', 'id')
data = self._download_json(
f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
f'https://player.{domain}/player/{site_id}/media/{video_id}/',
video_id, data=urlencode_postdata({
'version': '6.4.0',
'referer': url,

View File

@ -263,6 +263,9 @@ class InfoExtractor:
* http_chunk_size Chunk size for HTTP downloads
* ffmpeg_args Extra arguments for ffmpeg downloader (input)
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
* ws (NiconicoLiveFD only) WebSocketResponse
* ws_url (NiconicoLiveFD only) Websockets URL
* max_quality (NiconicoLiveFD only) Max stream quality string
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url,

View File

@ -7,12 +7,13 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
parse_duration,
str_or_none,
try_get,
unescapeHTML,
unified_strdate,
update_url,
update_url_query,
url_or_none,
)
@ -22,8 +23,8 @@
class HuyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
IE_NAME = 'huya:live'
IE_DESC = 'huya.com'
TESTS = [{
IE_DESC = '虎牙直播'
_TESTS = [{
'url': 'https://www.huya.com/572329',
'info_dict': {
'id': '572329',
@ -149,63 +150,94 @@ class HuyaVideoIE(InfoExtractor):
'id': '1002412640',
'ext': 'mp4',
'title': '8月3日',
'thumbnail': r're:https?://.*\.jpg',
'duration': 14,
'categories': ['主机游戏'],
'duration': 14.0,
'uploader': '虎牙-ATS欧卡车队青木',
'uploader_id': '1564376151',
'upload_date': '20240803',
'view_count': int,
'comment_count': int,
'like_count': int,
'thumbnail': r're:https?://.+\.jpg',
'timestamp': 1722675433,
},
},
{
}, {
'url': 'https://www.huya.com/video/play/556054543.html',
'info_dict': {
'id': '556054543',
'ext': 'mp4',
'title': '我不挑事 也不怕事',
'thumbnail': r're:https?://.*\.jpg',
'duration': 1864,
'categories': ['英雄联盟'],
'description': 'md5:58184869687d18ce62dc7b4b2ad21201',
'duration': 1864.0,
'uploader': '卡尔',
'uploader_id': '367138632',
'upload_date': '20210811',
'view_count': int,
'comment_count': int,
'like_count': int,
'tags': 'count:4',
'thumbnail': r're:https?://.+\.jpg',
'timestamp': 1628675950,
},
}, {
# Only m3u8 available
'url': 'https://www.huya.com/video/play/1063345618.html',
'info_dict': {
'id': '1063345618',
'ext': 'mp4',
'title': '峡谷第一中黑铁上钻石顶级教学对抗elo',
'categories': ['英雄联盟'],
'comment_count': int,
'duration': 21603.0,
'like_count': int,
'thumbnail': r're:https?://.+\.jpg',
'timestamp': 1749668803,
'upload_date': '20250611',
'uploader': '北枫CC',
'uploader_id': '2183525275',
'view_count': int,
},
}]
def _real_extract(self, url: str):
video_id = self._match_id(url)
video_data = self._download_json(
'https://liveapi.huya.com/moment/getMomentContent', video_id,
query={'videoId': video_id})['data']['moment']['videoInfo']
moment = self._download_json(
'https://liveapi.huya.com/moment/getMomentContent',
video_id, query={'videoId': video_id})['data']['moment']
formats = []
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
formats.append({
'url': definition['url'],
**traverse_obj(definition, {
'format_id': ('defName', {str}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
for definition in traverse_obj(moment, (
'videoInfo', 'definitions', lambda _, v: url_or_none(v['m3u8']),
)):
fmts = self._extract_m3u8_formats(definition['m3u8'], video_id, 'mp4', fatal=False)
for fmt in fmts:
fmt.update(**traverse_obj(definition, {
'filesize': ('size', {int_or_none}),
}),
})
'format_id': ('defName', {str}),
'height': ('height', {int_or_none}),
'quality': ('definition', {int_or_none}),
'width': ('width', {int_or_none}),
}))
formats.extend(fmts)
return {
'id': video_id,
'formats': formats,
**traverse_obj(video_data, {
**traverse_obj(moment, {
'comment_count': ('commentCount', {int_or_none}),
'description': ('content', {clean_html}, filter),
'like_count': ('favorCount', {int_or_none}),
'timestamp': ('cTime', {int_or_none}),
}),
**traverse_obj(moment, ('videoInfo', {
'title': ('videoTitle', {str}),
'thumbnail': ('videoCover', {url_or_none}),
'categories': ('category', {str}, filter, all, filter),
'duration': ('videoDuration', {parse_duration}),
'tags': ('tags', ..., {str}, filter, all, filter),
'thumbnail': (('videoBigCover', 'videoCover'), {url_or_none}, {update_url(query=None)}, any),
'uploader': ('nickName', {str}),
'uploader_id': ('uid', {str_or_none}),
'upload_date': ('videoUploadTime', {unified_strdate}),
'view_count': ('videoPlayNum', {int_or_none}),
'comment_count': ('videoCommentNum', {int_or_none}),
'like_count': ('favorCount', {int_or_none}),
}),
})),
}

View File

@ -167,11 +167,11 @@ class LSMLTVEmbedIE(InfoExtractor):
'duration': 1442,
'upload_date': '20231121',
'title': 'D23-6000-105_cetstud',
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
},
}, {
'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
'md5': 'a1711e190fe680fdb68fd8413b378e87',
'md5': 'f236cef2fd5953612754e4e66be51e7a',
'info_dict': {
'id': 'wUnFArIPDSY',
'ext': 'mp4',
@ -198,6 +198,8 @@ class LSMLTVEmbedIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/@LTV16plus',
'like_count': int,
'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
'media_type': 'livestream',
'timestamp': 1652550741,
},
}]
@ -208,7 +210,7 @@ def _real_extract(self, url):
r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
embed_type = traverse_obj(data, ('source', 'name', {str}))
if embed_type == 'telia':
if embed_type in ('backscreen', 'telia'): # 'telia' only for backwards compat
ie_key = 'CloudyCDN'
embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
elif embed_type == 'youtube':
@ -226,9 +228,9 @@ def _real_extract(self, url):
class LSMReplayIE(InfoExtractor):
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:skaties/|klausies/)?(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
'url': 'https://replay.lsm.lv/lv/skaties/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
'md5': '64f72a360ca530d5ed89c77646c9eee5',
'info_dict': {
'id': '46k_d23-6000-105',
@ -241,20 +243,23 @@ class LSMReplayIE(InfoExtractor):
'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
},
}, {
'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
'md5': '719b33875cd1429846eeeaeec6df2830',
'url': 'https://replay.lsm.lv/lv/klausies/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
'md5': '84feb80fd7e6ec07744726a9f01cda4d',
'info_dict': {
'id': 'a342781',
'ext': 'mp3',
'id': '183522',
'ext': 'm4a',
'duration': 1823,
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
'upload_date': '20231102',
'timestamp': 1698921060,
'timestamp': 1698913860,
'description': 'md5:7bac3b2dd41e44325032943251c357b1',
},
}, {
'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
'url': 'https://replay.lsm.lv/ru/skaties/statja/ltv/355067/v-kengaragse-nacalas-ukladka-relsov',
'only_matching': True,
}, {
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
'only_matching': True,
}]
@ -267,12 +272,24 @@ def _real_extract(self, url):
data = self._search_nuxt_data(
self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
playback_type = data['playback']['type']
if playback_type == 'playable_audio_lr':
playback_data = {
'formats': self._extract_m3u8_formats(data['playback']['service']['hls_url'], video_id),
}
elif playback_type == 'embed':
playback_data = {
'_type': 'url_transparent',
'url': data['playback']['service']['url'],
}
else:
raise ExtractorError(f'Unsupported playback type "{playback_type}"')
return {
'_type': 'url_transparent',
'id': video_id,
**playback_data,
**traverse_obj(data, {
'url': ('playback', 'service', 'url', {url_or_none}),
'title': ('mediaItem', 'title'),
'description': ('mediaItem', ('lead', 'body')),
'duration': ('mediaItem', 'duration', {int_or_none}),

107
yt_dlp/extractor/mave.py Normal file
View File

@ -0,0 +1,107 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
urljoin,
)
from ..utils.traversal import require, traverse_obj
class MaveIE(InfoExtractor):
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
_TESTS = [{
'url': 'https://ochenlichnoe.mave.digital/ep-25',
'md5': 'aa3e513ef588b4366df1520657cbc10c',
'info_dict': {
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
'ext': 'mp3',
'display_id': 'ochenlichnoe-ep-25',
'title': 'Между мной и миром: психология самооценки',
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
'uploader': 'Самарский университет',
'channel': 'Очень личное',
'channel_id': 'ochenlichnoe',
'channel_url': 'https://ochenlichnoe.mave.digital/',
'view_count': int,
'like_count': int,
'dislike_count': int,
'duration': 3744,
'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg',
'series': 'Очень личное',
'series_id': '2e0c3749-6df2-4946-82f4-50691419c065',
'season': 'Season 3',
'season_number': 3,
'episode': 'Episode 3',
'episode_number': 3,
'timestamp': 1747817300,
'upload_date': '20250521',
},
}, {
'url': 'https://budem.mave.digital/ep-12',
'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f',
'info_dict': {
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
'ext': 'mp3',
'display_id': 'budem-ep-12',
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
'uploader': 'Полина Цветкова+Евгения Акопова',
'channel': 'Все там будем',
'channel_id': 'budem',
'channel_url': 'https://budem.mave.digital/',
'view_count': int,
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'duration': 3664,
'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg',
'series': 'Все там будем',
'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746',
'season': 'Season 2',
'season_number': 2,
'episode': 'Episode 5',
'episode_number': 5,
'timestamp': 1735538400,
'upload_date': '20241230',
},
}]
_API_BASE_URL = 'https://api.mave.digital/'
def _real_extract(self, url):
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
display_id = f'{channel_id}-{slug}'
webpage = self._download_webpage(url, display_id)
data = traverse_obj(
self._search_nuxt_json(webpage, display_id),
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
return {
'display_id': display_id,
'channel_id': channel_id,
'channel_url': f'https://{channel_id}.mave.digital/',
'vcodec': 'none',
'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
**traverse_obj(data, ('activeEpisodeData', {
'url': ('audio', {urljoin(self._API_BASE_URL)}),
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {clean_html}),
'duration': ('duration', {int_or_none}),
'season_number': ('season', {int_or_none}),
'episode_number': ('number', {int_or_none}),
'view_count': ('listenings', {int_or_none}),
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
'timestamp': ('publish_date', {parse_iso8601}),
})),
**traverse_obj(data, ('podcast', 'podcast', {
'series_id': ('id', {str}),
'series': ('title', {str}),
'channel': ('title', {str}),
'uploader': ('author', {str}),
})),
}

View File

@ -4,16 +4,15 @@
import json
import re
import time
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
OnDemandPagedList,
clean_html,
determine_ext,
extract_attributes,
float_or_none,
int_or_none,
parse_bitrate,
@ -22,9 +21,8 @@
parse_qs,
parse_resolution,
qualities,
remove_start,
str_or_none,
unescapeHTML,
truncate_string,
unified_timestamp,
update_url_query,
url_basename,
@ -32,7 +30,11 @@
urlencode_postdata,
urljoin,
)
from ..utils.traversal import find_element, require, traverse_obj
from ..utils.traversal import (
find_element,
require,
traverse_obj,
)
class NiconicoBaseIE(InfoExtractor):
@ -806,41 +808,39 @@ class NiconicoLiveIE(NiconicoBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
webpage = self._download_webpage(url, video_id, expected_status=404)
if err_msg := traverse_obj(webpage, ({find_element(cls='message')}, {clean_html})):
raise ExtractorError(err_msg, expected=True)
embedded_data = self._parse_json(unescapeHTML(self._search_regex(
r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
if not ws_url:
raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
ws_url = update_url_query(ws_url, {
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
})
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
embedded_data = traverse_obj(webpage, (
{find_element(tag='script', id='embedded-data', html=True)},
{extract_attributes}, 'data-props', {json.loads}))
frontend_id = traverse_obj(embedded_data, ('site', 'frontendId', {str_or_none}), default='9')
ws_url = traverse_obj(embedded_data, (
'site', 'relive', 'webSocketUrl', {url_or_none}, {require('websocket URL')}))
ws_url = update_url_query(ws_url, {'frontend_id': frontend_id})
ws = self._request_webpage(
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
video_id=video_id, note='Connecting to WebSocket server')
ws_url, video_id, 'Connecting to WebSocket server',
headers={'Origin': 'https://live.nicovideo.jp'})
self.write_debug('Sending HLS server request')
ws.send(json.dumps({
'type': 'startWatching',
'data': {
'reconnect': False,
'room': {
'commentable': True,
'protocol': 'webSocket',
},
'stream': {
'quality': 'abr',
'protocol': 'hls',
'latency': 'high',
'accessRightMethod': 'single_cookie',
'chasePlay': False,
'latency': 'high',
'protocol': 'hls',
'quality': 'abr',
},
'room': {
'protocol': 'webSocket',
'commentable': True,
},
'reconnect': False,
},
'type': 'startWatching',
}))
while True:
@ -860,17 +860,15 @@ def _real_extract(self, url):
raise ExtractorError('Disconnected at middle of extraction')
elif data.get('type') == 'error':
self.write_debug(recv)
message = traverse_obj(data, ('body', 'code')) or recv
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
raise ExtractorError(message)
elif self.get_param('verbose', False):
if len(recv) > 100:
recv = recv[:100] + '...'
self.write_debug(f'Server said: {recv}')
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {}
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail', {dict})) or {}
thumbnails = []
for name, value in raw_thumbs.items():
if not isinstance(value, dict):
@ -897,31 +895,30 @@ def _real_extract(self, url):
cookie['domain'], cookie['name'], cookie['value'],
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
fmt_common = {
'live_latency': 'high',
'origin': hostname,
'protocol': 'niconico_live',
'video_id': video_id,
'ws': ws,
}
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
a_map = {96: 'audio_low', 192: 'audio_high'}
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
for fmt in formats:
fmt['protocol'] = 'niconico_live'
if fmt.get('acodec') == 'none':
fmt['format_id'] = next(q_iter, fmt['format_id'])
elif fmt.get('vcodec') == 'none':
abr = parse_bitrate(fmt['url'].lower())
fmt.update({
'abr': abr,
'acodec': 'mp4a.40.2',
'format_id': a_map.get(abr, fmt['format_id']),
})
fmt.update(fmt_common)
return {
'id': video_id,
'title': title,
'downloader_options': {
'max_quality': traverse_obj(embedded_data, ('program', 'stream', 'maxQuality', {str})) or 'normal',
'ws': ws,
'ws_url': ws_url,
},
**traverse_obj(embedded_data, {
'view_count': ('program', 'statistics', 'watchCount'),
'comment_count': ('program', 'statistics', 'commentCount'),

View File

@ -213,7 +213,7 @@ class CieloTVItIE(SkyItIE): # XXX: Do not subclass from concrete IE
class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'tv8.it'
_VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/(?:[0-9a-z-]+-)?(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.tv8.it/video/ogni-mattina-ucciso-asino-di-andrea-lo-cicero-630529',
'md5': '9ab906a3f75ea342ed928442f9dabd21',
@ -227,6 +227,19 @@ class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
'thumbnail': 'https://videoplatform.sky.it/still/2020/11/18/1605717753954_ogni-mattina-ucciso-asino-di-andrea-lo-cicero_videostill_1.jpg',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.tv8.it/video/964361',
'md5': '1e58e807154658a16edc29e45be38107',
'info_dict': {
'id': '964361',
'ext': 'mp4',
'title': 'GialappaShow - S.4 Ep.2',
'description': 'md5:60bb4ff5af18bbeeaedabc1de5f9e1e2',
'duration': 8030,
'thumbnail': 'https://videoplatform.sky.it/captures/494/2024/11/06/964361/964361_1730888412914_thumb_494.jpg',
'timestamp': 1730821499,
'upload_date': '20241105',
},
}]
_DOMAIN = 'mtv8'

View File

@ -25,6 +25,7 @@ class SportDeutschlandIE(InfoExtractor):
'upload_date': '20230114',
'timestamp': 1673733618,
},
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
'info_dict': {
@ -41,6 +42,7 @@ class SportDeutschlandIE(InfoExtractor):
'upload_date': '20220309',
'timestamp': 1646860727.0,
},
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023',
'info_dict': {
@ -68,6 +70,7 @@ class SportDeutschlandIE(InfoExtractor):
'live_status': 'was_live',
},
}],
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
'info_dict': {
@ -82,13 +85,30 @@ class SportDeutschlandIE(InfoExtractor):
'live_status': 'is_live',
},
'skip': 'live',
}, {
'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
'md5': '35c11a19395c938cdd076b93bda54cde',
'info_dict': {
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
'ext': 'mp4',
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
'display_id': 'rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
'channel': 'Rostock Griffins',
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
'live_status': 'was_live',
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
'timestamp': 1749913117,
'upload_date': '20250614',
},
}]
def _process_video(self, asset_id, video):
is_live = video['type'] == 'mux_live'
token = self._download_json(
f'https://api.sportdeutschland.tv/api/frontend/asset-token/{asset_id}',
video['id'], query={'type': video['type'], 'playback_id': video['src']})['token']
f'https://api.sportdeutschland.tv/api/web/personal/asset-token/{asset_id}',
video['id'], query={'type': video['type'], 'playback_id': video['src']},
headers={'Referer': 'https://sportdeutschland.tv/'})['token']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'https://stream.mux.com/{video["src"]}.m3u8?token={token}', video['id'], live=is_live)

View File

@ -41,6 +41,7 @@ class SproutVideoIE(InfoExtractor):
'duration': 703,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
'skip': 'Account Disabled',
}, {
# http formats 'sd' and 'hd' are available
'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
@ -97,11 +98,21 @@ def _extract_embed_urls(cls, url, webpage):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
webpage = self._download_webpage(url, video_id, headers={
**traverse_obj(smuggled_data, {'Referer': 'referer'}),
# yt-dlp's default Chrome user-agents are too old
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:140.0) Gecko/20100101 Firefox/140.0',
})
data = self._search_json(
r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id,
contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
transform_source=lambda x: base64.b64decode(x).decode())
# SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e]
# e.g. if the user-agent we used with the webpage request is too old
video_uid = data['videoUid']
if video_id != video_uid:
raise ExtractorError(f'{self.IE_NAME} sent the wrong video data ({video_uid})')
formats, subtitles = [], {}
headers = {

View File

@ -2818,7 +2818,10 @@ def _get_checkok_params():
def _generate_player_context(cls, sts=None, reload_playback_token=None):
content_playback_context = {
'html5Preference': 'HTML5_PREF_WANTS',
'isInlinePlaybackNoAd': True,
'adPlaybackContext': {
'pyv': True,
'adType': 'AD_TYPE_INSTREAM',
},
}
if sts is not None:
content_playback_context['signatureTimestamp'] = sts
@ -3681,6 +3684,11 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
f['source_preference'] -= 20
# XXX: Check if IOS HLS formats are affected by player PO token enforcement; temporary
# See https://github.com/yt-dlp/yt-dlp/issues/13511
if proto == 'hls' and client_name == 'ios':
f['__needs_testing'] = True
itags[itag].add(key)
if itag and all_formats:
@ -4409,6 +4417,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
# Newly uploaded videos' HLS formats are potentially problematic and need to be checked
# XXX: This is redundant for as long as we are already checking all IOS HLS formats
upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
if upload_datetime >= datetime_from_str('today-2days'):
for fmt in info['formats']:

View File

@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
__version__ = '2025.06.09'
__version__ = '2025.06.25'
RELEASE_GIT_HEAD = '339614a173c74b42d63e858c446a9cae262a13af'
RELEASE_GIT_HEAD = '1838a1ce5d4ade80770ba9162eaffc9a1607dc70'
VARIANT = None
@ -12,4 +12,4 @@
ORIGIN = 'yt-dlp/yt-dlp'
_pkg_version = '2025.06.09'
_pkg_version = '2025.06.25'