[^>]+>[^>]+>([^<]+)',
- webpage, 'description', fatal=False),
- 'uploader': self._html_search_regex(
- r'[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False),
- 'thumbnail': media.get('staticImage'),
+ 'subtitles': subtitles,
+ **traverse_obj(video_details, {
+ 'title': (('parlViewTitle', 'title'), {str}, any),
+ 'description': ('parlViewDescription', {str}),
+ 'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}),
+ 'timestamp': ('recordingFrom', {parse_iso8601}),
+ 'thumbnail': ('thumbUrl', {url_or_none}),
+ }),
}
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index 2c1436cac1..9038b4a7ff 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -19,7 +19,7 @@
url_or_none,
urljoin,
)
-from ..utils.traversal import traverse_obj, value
+from ..utils.traversal import require, traverse_obj, value
class PatreonBaseIE(InfoExtractor):
@@ -462,7 +462,7 @@ class PatreonCampaignIE(PatreonBaseIE):
_VALID_URL = r'''(?x)
https?://(?:www\.)?patreon\.com/(?:
(?:m|api/campaigns)/(?P\d+)|
- (?:c/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+)
+ (?:cw?/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+)
)(?:/posts)?/?(?:$|[?#])'''
_TESTS = [{
'url': 'https://www.patreon.com/dissonancepod/',
@@ -531,6 +531,28 @@ class PatreonCampaignIE(PatreonBaseIE):
'age_limit': 0,
},
'playlist_mincount': 331,
+ 'skip': 'Channel removed',
+ }, {
+ # next.js v13 data, see https://github.com/yt-dlp/yt-dlp/issues/13622
+ 'url': 'https://www.patreon.com/c/anythingelse/posts',
+ 'info_dict': {
+ 'id': '9631148',
+ 'title': 'Anything Else?',
+ 'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08',
+ 'uploader': 'dan ',
+ 'uploader_id': '13852412',
+ 'uploader_url': 'https://www.patreon.com/anythingelse',
+ 'channel': 'Anything Else?',
+ 'channel_id': '9631148',
+ 'channel_url': 'https://www.patreon.com/anythingelse',
+ 'channel_follower_count': int,
+ 'age_limit': 0,
+ 'thumbnail': r're:https?://.+/.+',
+ },
+ 'playlist_mincount': 151,
+ }, {
+ 'url': 'https://www.patreon.com/cw/anythingelse',
+ 'only_matching': True,
}, {
'url': 'https://www.patreon.com/c/OgSog/posts',
'only_matching': True,
@@ -572,8 +594,11 @@ def _real_extract(self, url):
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
if campaign_id is None:
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
- campaign_id = self._search_nextjs_data(
- webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
+ campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), (
+ 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
+ if not campaign_id:
+ campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
+ lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')}))
params = {
'json-api-use-default-includes': 'false',
diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py
index 2b69c7e6cf..4c8205f8ee 100644
--- a/yt_dlp/extractor/peertube.py
+++ b/yt_dlp/extractor/peertube.py
@@ -1331,7 +1331,7 @@ class PeerTubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'What is PeerTube?',
'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
- 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'thumbnail': r're:https?://framatube\.org/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1538391166,
'upload_date': '20181001',
'uploader': 'Framasoft',
@@ -1346,19 +1346,34 @@ class PeerTubeIE(InfoExtractor):
'view_count': int,
'like_count': int,
'dislike_count': int,
- 'tags': ['framasoft', 'peertube'],
+ 'tags': 'count:2',
'categories': ['Science & Technology'],
},
+ 'expected_warnings': ['HTTP Error 400: Bad Request'],
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
'info_dict': {
'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
'ext': 'mp4',
'title': 'E2E tests',
- 'uploader_id': '37855',
+ 'categories': ['Unknown'],
+ 'channel': 'Main chocobozzz channel',
+ 'channel_id': '5187',
+ 'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
+ 'description': 'md5:67daf92c833c41c95db874e18fcb2786',
+ 'dislike_count': int,
+ 'duration': 52,
+ 'license': 'Unknown',
+ 'like_count': int,
+ 'tags': [],
+ 'thumbnail': r're:https?://peertube2\.cpy\.re/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1589276219,
'upload_date': '20200512',
'uploader': 'chocobozzz',
+ 'uploader_id': '37855',
+ 'uploader_url': 'https://peertube2.cpy.re/accounts/chocobozzz',
+ 'view_count': int,
},
}, {
'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
@@ -1366,10 +1381,23 @@ class PeerTubeIE(InfoExtractor):
'id': '3fbif9S3WmtTP8gGsC5HBd',
'ext': 'mp4',
'title': 'E2E tests',
- 'uploader_id': '37855',
+ 'categories': ['Unknown'],
+ 'channel': 'Main chocobozzz channel',
+ 'channel_id': '5187',
+ 'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
+ 'description': 'md5:67daf92c833c41c95db874e18fcb2786',
+ 'dislike_count': int,
+ 'duration': 52,
+ 'license': 'Unknown',
+ 'like_count': int,
+ 'tags': [],
+ 'thumbnail': r're:https?://peertube2\.cpy\.re/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1589276219,
'upload_date': '20200512',
'uploader': 'chocobozzz',
+ 'uploader_id': '37855',
+ 'uploader_url': 'https://peertube2.cpy.re/accounts/chocobozzz',
+ 'view_count': int,
},
}, {
'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
@@ -1377,13 +1405,26 @@ class PeerTubeIE(InfoExtractor):
'id': '3fbif9S3WmtTP8gGsC5HBd',
'ext': 'mp4',
'title': 'E2E tests',
- 'uploader_id': '37855',
+ 'categories': ['Unknown'],
+ 'channel': 'Main chocobozzz channel',
+ 'channel_id': '5187',
+ 'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
+ 'description': 'md5:67daf92c833c41c95db874e18fcb2786',
+ 'dislike_count': int,
+ 'duration': 52,
+ 'license': 'Unknown',
+ 'like_count': int,
+ 'tags': [],
+ 'thumbnail': r're:https?://peertube2\.cpy\.re/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1589276219,
'upload_date': '20200512',
'uploader': 'chocobozzz',
+ 'uploader_id': '37855',
+ 'uploader_url': 'https://peertube2.cpy.re/accounts/chocobozzz',
+ 'view_count': int,
},
}, {
- # Issue #26002
+ # https://github.com/ytdl-org/youtube-dl/issues/26002
'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
'info_dict': {
'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
@@ -1394,6 +1435,7 @@ class PeerTubeIE(InfoExtractor):
'upload_date': '20200420',
'uploader': 'Drew DeVault',
},
+ 'skip': 'Invalid URL',
}, {
'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
'only_matching': True,
@@ -1411,6 +1453,33 @@ class PeerTubeIE(InfoExtractor):
'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
'only_matching': True,
}]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://video.macver.org/w/6gvhZpUGQVd4SQ6oYDc9pC',
+ 'info_dict': {
+ 'id': '6gvhZpUGQVd4SQ6oYDc9pC',
+ 'ext': 'mp4',
+ 'title': 'Minecraft, but if you say a block, it gets deleted',
+ 'categories': ['Gaming'],
+ 'channel': 'Waffle Irons Gaming',
+ 'channel_id': '4',
+ 'channel_url': 'https://video.macver.org/video-channels/waffle_irons',
+ 'description': 'md5:eda8daf64b0dadd00cc248f28eef213c',
+ 'dislike_count': int,
+ 'duration': 1643,
+ 'license': 'Attribution - Non Commercial',
+ 'like_count': int,
+ 'tags': 'count:1',
+ 'thumbnail': r're:https?://video\.macver\.org/lazy-static/thumbnails/.+\.jpg',
+ 'timestamp': 1751142352,
+ 'upload_date': '20250628',
+ 'uploader': 'Bog',
+ 'uploader_id': '3',
+ 'uploader_url': 'https://video.macver.org/accounts/bog',
+ 'view_count': int,
+ },
+ 'expected_warnings': ['HTTP Error 400: Bad Request', 'Ignoring subtitle tracks found in the HLS manifest'],
+ 'params': {'skip_download': 'm3u8'},
+ }]
@staticmethod
def _extract_peertube_url(webpage, source_url):
@@ -1580,31 +1649,47 @@ class PeerTubePlaylistIE(InfoExtractor):
'id': 'hFdJoTuyhNJVa1cDWd1d12',
'description': 'Diversas palestras do Richard Stallman no Brasil.',
'title': 'Richard Stallman no Brasil',
+ 'channel': 'debianbrazilteam',
+ 'channel_id': 1522,
+ 'thumbnail': r're:https?://peertube\.debian\.social/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1599676222,
+ 'upload_date': '20200909',
},
'playlist_mincount': 9,
}, {
'url': 'https://peertube2.cpy.re/a/chocobozzz/videos',
'info_dict': {
'id': 'chocobozzz',
- 'timestamp': 1553874564,
'title': 'chocobozzz',
+ 'channel': 'chocobozzz',
+ 'channel_id': 37855,
+ 'thumbnail': '',
+ 'timestamp': 1553874564,
+ 'upload_date': '20190329',
},
'playlist_mincount': 2,
}, {
'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos',
'info_dict': {
'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8',
- 'timestamp': 1519917377,
'title': 'Les vidéos de Framasoft',
+ 'channel': 'framasoft',
+ 'channel_id': 3,
+ 'thumbnail': '',
+ 'timestamp': 1519917377,
+ 'upload_date': '20180301',
},
'playlist_mincount': 345,
}, {
'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos',
'info_dict': {
'id': 'blender_open_movies@video.blender.org',
- 'timestamp': 1542287810,
'title': 'Official Blender Open Movies',
+ 'channel': 'blender',
+ 'channel_id': 1926,
+ 'thumbnail': '',
+ 'timestamp': 1540472902,
+ 'upload_date': '20181025',
},
'playlist_mincount': 11,
}]
diff --git a/yt_dlp/extractor/playerfm.py b/yt_dlp/extractor/playerfm.py
new file mode 100644
index 0000000000..d59d651a32
--- /dev/null
+++ b/yt_dlp/extractor/playerfm.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import clean_html, clean_podcast_url, int_or_none, str_or_none, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class PlayerFmIE(InfoExtractor):
+ _VALID_URL = r'(?Phttps?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P[\w-]+))'
+ _TESTS = [{
+ 'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix',
+ 'info_dict': {
+ 'ext': 'mp3',
+ 'id': '478606546',
+ 'display_id': 'movie-mindset-33-casino-feat-felix',
+ 'thumbnail': r're:^https://.*\.(jpg|png)',
+ 'title': 'Movie Mindset 33 - Casino feat. Felix',
+ 'creators': ['Chapo Trap House'],
+ 'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.',
+ 'duration': 6830,
+ 'timestamp': 1745406000,
+ 'upload_date': '20250423',
+ },
+ }, {
+ 'url': 'https://player.fm/series/nbc-nightly-news-with-tom-llamas/thursday-april-17-2025',
+ 'info_dict': {
+ 'ext': 'mp3',
+ 'id': '477635490',
+ 'display_id': 'thursday-april-17-2025',
+ 'title': 'Thursday, April 17, 2025',
+ 'thumbnail': r're:^https://.*\.(jpg|png)',
+ 'duration': 1143,
+ 'description': 'md5:4890b8cf9a55a787561cd5d59dfcda82',
+ 'creators': ['NBC News'],
+ 'timestamp': 1744941374,
+ 'upload_date': '20250418',
+ },
+ }, {
+ 'url': 'https://player.fm/series/soccer-101/ep-109-its-kicking-off-how-have-the-rules-for-kickoff-changed-what-are-the-best-approaches-to-getting-the-game-underway-and-how-could-we-improve-on-the-present-system-ack3NzL3yibvs4pf',
+ 'info_dict': {
+ 'ext': 'mp3',
+ 'id': '481418710',
+ 'thumbnail': r're:^https://.*\.(jpg|png)',
+ 'title': r're:#109 It\'s kicking off! How have the rules for kickoff changed, .+ the present system\?',
+ 'creators': ['TSS'],
+ 'duration': 1510,
+ 'display_id': 'md5:b52ecacaefab891b59db69721bfd9b13',
+ 'description': 'md5:52a39e36d08d8919527454f152ad3c25',
+ 'timestamp': 1659102055,
+ 'upload_date': '20220729',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id, url = self._match_valid_url(url).group('id', 'url')
+ data = self._download_json(f'{url}.json', display_id)
+
+ return {
+ 'display_id': display_id,
+ 'vcodec': 'none',
+ **traverse_obj(data, {
+ 'id': ('id', {int}, {str_or_none}),
+ 'url': ('url', {clean_podcast_url}),
+ 'title': ('title', {str}),
+ 'description': ('description', {clean_html}),
+ 'duration': ('duration', {int_or_none}),
+ 'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any),
+ 'filesize': ('size', {int_or_none}),
+ 'timestamp': ('publishedAt', {int_or_none}),
+ 'creators': ('series', 'author', {str}, filter, all, filter),
+ }),
+ }
diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py
index 8539a4b56c..2323bd0cf9 100644
--- a/yt_dlp/extractor/playwire.py
+++ b/yt_dlp/extractor/playwire.py
@@ -19,6 +19,7 @@ class PlaywireIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.png$',
'duration': 145.94,
},
+ 'skip': 'Invalid URL',
}, {
# m3u8 in f4m
'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
@@ -27,10 +28,7 @@ class PlaywireIE(InfoExtractor):
'ext': 'mp4',
'title': 'ITV EL SHOW FULL',
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
+ 'skip': 'Invalid URL',
}, {
# Multiple resolutions while bitrates missing
'url': 'http://cdn.playwire.com/11625/embed/85228.html',
@@ -42,6 +40,15 @@ class PlaywireIE(InfoExtractor):
'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
'only_matching': True,
}]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+ 'info_dict': {
+ 'id': '3519514',
+ 'ext': 'mp4',
+ 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+ },
+ 'skip': 'Site no longer embeds Playwire',
+ }]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
diff --git a/yt_dlp/extractor/plyr.py b/yt_dlp/extractor/plyr.py
new file mode 100644
index 0000000000..c5f27cfd95
--- /dev/null
+++ b/yt_dlp/extractor/plyr.py
@@ -0,0 +1,104 @@
+import re
+
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+
+
+class PlyrEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _WEBPAGE_TESTS = [{
+ # data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1"
+ 'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/',
+ 'info_dict': {
+ 'id': '522319456',
+ 'ext': 'mp4',
+ 'title': '200.000.000 Mouths (1950–51)',
+ 'uploader': 'Zeughauskino',
+ 'uploader_url': '',
+ 'comment_count': int,
+ 'like_count': int,
+ 'duration': 963,
+ 'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d',
+ 'timestamp': 1615467405,
+ 'upload_date': '20210311',
+ 'release_timestamp': 1615467405,
+ 'release_date': '20210311',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'expected_warnings': ['Failed to parse XML: not well-formed'],
+ }, {
+ # data-plyr-provider="vimeo" data-plyr-embed-id="803435276"
+ 'url': 'https://www.inarcassa.it/',
+ 'info_dict': {
+ 'id': '803435276',
+ 'ext': 'mp4',
+ 'title': 'HOME_Moto_Perpetuo',
+ 'uploader': 'Inarcassa',
+ 'uploader_url': '',
+ 'duration': 38,
+ 'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'expected_warnings': ['Failed to parse XML: not well-formed'],
+ }, {
+ # data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI"
+ 'url': 'https://www.profile.nl',
+ 'info_dict': {
+ 'id': 'GF-BjYKoAqI',
+ 'ext': 'mp4',
+ 'title': 'PROFILE: Recruitment Profile',
+ 'description': '',
+ 'media_type': 'video',
+ 'uploader': 'Profile Nederland',
+ 'uploader_id': '@profilenederland',
+ 'uploader_url': 'https://www.youtube.com/@profilenederland',
+ 'channel': 'Profile Nederland',
+ 'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg',
+ 'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg',
+ 'channel_follower_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 0,
+ 'duration': 39,
+ 'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg',
+ 'categories': ['Autos & Vehicles'],
+ 'tags': [],
+ 'timestamp': 1675692990,
+ 'upload_date': '20230206',
+ 'playable_in_embed': True,
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ },
+ }, {
+ # data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube"
+ 'url': 'https://www.vnis.edu.vn',
+ 'info_dict': {
+ 'id': 'vnis.edu',
+ 'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ',
+ 'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e',
+ 'age_limit': 0,
+ 'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png',
+ 'timestamp': 1753233356,
+ 'upload_date': '20250723',
+ },
+ 'playlist_count': 3,
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ plyr_embeds = re.finditer(r'''(?x)
+ ]+(?:
+ data-plyr-embed-id="(?P [^"]+)"[^>]+data-plyr-provider="(?P[^"]+)"|
+ data-plyr-provider="(?P[^"]+)"[^>]+data-plyr-embed-id="(?P[^"]+)"
+ )[^>]*>''', webpage)
+ for mobj in plyr_embeds:
+ embed_id = mobj.group('id1') or mobj.group('id2')
+ provider = mobj.group('provider1') or mobj.group('provider2')
+ if provider == 'vimeo':
+ if not re.match(r'https?://', embed_id):
+ embed_id = f'https://player.vimeo.com/video/{embed_id}'
+ yield VimeoIE._smuggle_referrer(embed_id, url)
+ elif provider == 'youtube':
+ if not re.match(r'https?://', embed_id):
+ embed_id = f'https://youtube.com/watch?v={embed_id}'
+ yield embed_id
diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py
index c489dc7312..d1a4d4c37f 100644
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@@ -81,7 +81,7 @@ def fix_cdata(s):
# geo flag is a bit unreliable and not properly set all the time
geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y'
- ext = determine_ext(media_url)
+ ext = determine_ext(media_url).lower()
formats = []
if ext == 'mp3':
@@ -108,7 +108,7 @@ def fix_cdata(s):
'format_id': join_nonempty('https', bitrate, delim='-'),
})
else:
- raise ExtractorError('Unrecognized media file found')
+ raise ExtractorError(f'Unrecognized media extension "{ext}"')
if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
@@ -503,6 +503,28 @@ class RaiPlaySoundIE(RaiBaseIE):
'upload_date': '20211201',
},
'params': {'skip_download': True},
+ }, {
+ # case-sensitivity test for uppercase extension
+ 'url': 'https://www.raiplaysound.it/audio/2020/05/Storia--Lunita-dItalia-e-lunificazione-della-Germania-b4c16390-7f3f-4282-b353-d94897dacb7c.html',
+ 'md5': 'c69ebd69282f0effd7ef67b7e2f6c7d8',
+ 'info_dict': {
+ 'id': 'b4c16390-7f3f-4282-b353-d94897dacb7c',
+ 'ext': 'mp3',
+ 'title': "Storia | 01 L'unità d'Italia e l'unificazione della Germania",
+ 'alt_title': 'md5:ed4ed82585c52057b71b43994a59b705',
+ 'description': 'md5:92818b6f31b2c150567d56b75db2ea7f',
+ 'uploader': 'rai radio 3',
+ 'duration': 2439.0,
+ 'thumbnail': 'https://www.raiplaysound.it/dl/img/2023/09/07/1694084898279_Maturadio-LOGO-2048x1152.jpg',
+ 'creators': ['rai radio 3'],
+ 'series': 'Maturadio',
+ 'season': 'Season 9',
+ 'season_number': 9,
+ 'episode': "01. L'unità d'Italia e l'unificazione della Germania",
+ 'episode_number': 1,
+ 'timestamp': 1590400740,
+ 'upload_date': '20200525',
+ },
}]
def _real_extract(self, url):
@@ -765,7 +787,7 @@ class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE
class RaiSudtirolIE(RaiBaseIE):
- _VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P\w+)'
+ _VALID_URL = r'https?://rai(?:bz|sudtirol)\.rai\.it/.+media=(?P\w+)'
_TESTS = [{
# mp4 file
'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
@@ -791,6 +813,9 @@ class RaiSudtirolIE(RaiBaseIE):
'formats': 'count:6',
},
'params': {'skip_download': True},
+ }, {
+ 'url': 'https://raibz.rai.it/de/index.php?media=Ptv1751660400',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/roya.py b/yt_dlp/extractor/roya.py
index e9fe304eeb..9094808a15 100644
--- a/yt_dlp/extractor/roya.py
+++ b/yt_dlp/extractor/roya.py
@@ -3,9 +3,9 @@
class RoyaLiveIE(InfoExtractor):
- _VALID_URL = r'https?://roya\.tv/live-stream/(?P\d+)'
+ _VALID_URL = r'https?://(?:en\.)?roya\.tv/live-stream/(?P\d+)'
_TESTS = [{
- 'url': 'https://roya.tv/live-stream/1',
+ 'url': 'https://en.roya.tv/live-stream/1',
'info_dict': {
'id': '1',
'title': r're:Roya TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py
index 2812d93059..c2ccf73ddc 100644
--- a/yt_dlp/extractor/rtve.py
+++ b/yt_dlp/extractor/rtve.py
@@ -6,9 +6,11 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ InAdvancePagedList,
clean_html,
determine_ext,
float_or_none,
+ int_or_none,
make_archive_id,
parse_iso8601,
qualities,
@@ -371,3 +373,62 @@ def _real_extract(self, url):
raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
+
+
+class RTVEProgramIE(RTVEBaseIE):
+ IE_NAME = 'rtve.es:program'
+ IE_DESC = 'RTVE.es programs'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/(?P[\w-]+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.rtve.es/play/videos/saber-vivir/',
+ 'info_dict': {
+ 'id': '111570',
+ 'title': 'Saber vivir - Programa de ciencia y futuro en RTVE Play',
+ },
+ 'playlist_mincount': 400,
+ }]
+ _PAGE_SIZE = 60
+
+ def _fetch_page(self, program_id, page_num):
+ return self._download_json(
+ f'https://www.rtve.es/api/programas/{program_id}/videos',
+ program_id, note=f'Downloading page {page_num}',
+ query={
+ 'type': 39816,
+ 'page': page_num,
+ 'size': 60,
+ })
+
+ def _entries(self, page_data):
+ for video in traverse_obj(page_data, ('page', 'items', lambda _, v: url_or_none(v['htmlUrl']))):
+ yield self.url_result(
+ video['htmlUrl'], RTVEALaCartaIE, url_transparent=True,
+ **traverse_obj(video, {
+ 'id': ('id', {str}),
+ 'title': ('longTitle', {str}),
+ 'description': ('shortDescription', {str}),
+ 'duration': ('duration', {float_or_none(scale=1000)}),
+ 'series': (('programInfo', 'title'), {str}, any),
+ 'season_number': ('temporadaOrden', {int_or_none}),
+ 'season_id': ('temporadaId', {str}),
+ 'season': ('temporada', {str}),
+ 'episode_number': ('episode', {int_or_none}),
+ 'episode': ('title', {str}),
+ 'thumbnail': ('thumbnail', {url_or_none}),
+ }),
+ )
+
+ def _real_extract(self, url):
+ program_slug = self._match_id(url)
+ program_page = self._download_webpage(url, program_slug)
+
+ program_id = self._html_search_meta('DC.identifier', program_page, 'Program ID', fatal=True)
+
+ first_page = self._fetch_page(program_id, 1)
+ page_count = traverse_obj(first_page, ('page', 'totalPages', {int})) or 1
+
+ entries = InAdvancePagedList(
+ lambda idx: self._entries(self._fetch_page(program_id, idx + 1) if idx else first_page),
+ page_count, self._PAGE_SIZE)
+
+ return self.playlist_result(entries, program_id, self._html_extract_title(program_page))
diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py
index 20365ac5d1..1e0e983529 100644
--- a/yt_dlp/extractor/rutube.py
+++ b/yt_dlp/extractor/rutube.py
@@ -115,7 +115,6 @@ class RutubeIE(RutubeBaseIE):
_TESTS = [{
'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
- 'md5': '3d73fdfe5bb81b9aef139e22ef3de26a',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
'ext': 'mp4',
@@ -128,10 +127,11 @@ class RutubeIE(RutubeBaseIE):
'upload_date': '20131016',
'age_limit': 0,
'view_count': int,
- 'thumbnail': 'https://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
+ 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'categories': ['Новости и СМИ'],
'chapters': [],
},
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True,
@@ -146,7 +146,6 @@ class RutubeIE(RutubeBaseIE):
'only_matching': True,
}, {
'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
- 'md5': '4fce7b4fcc7b1bcaa3f45eb1e1ad0dd7',
'info_dict': {
'id': '884fb55f07a97ab673c7d654553e0f48',
'ext': 'mp4',
@@ -163,6 +162,7 @@ class RutubeIE(RutubeBaseIE):
'categories': ['Видеоигры'],
'chapters': [],
},
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/',
'info_dict': {
@@ -171,7 +171,7 @@ class RutubeIE(RutubeBaseIE):
'chapters': 'count:4',
'categories': ['Бизнес и предпринимательство'],
'description': 'md5:252feac1305257d8c1bab215cedde75d',
- 'thumbnail': 'https://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
+ 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'duration': 782,
'age_limit': 0,
'uploader_id': '23491359',
@@ -181,6 +181,7 @@ class RutubeIE(RutubeBaseIE):
'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1',
'uploader': 'Стас Быков',
},
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
'info_dict': {
@@ -188,16 +189,17 @@ class RutubeIE(RutubeBaseIE):
'ext': 'mp4',
'categories': ['Телепередачи'],
'description': '',
- 'thumbnail': 'https://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
+ 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'live_status': 'is_live',
'age_limit': 0,
'uploader_id': '23460655',
'timestamp': 1652972968,
'view_count': int,
'upload_date': '20220519',
- 'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'title': str,
'uploader': 'Первый канал',
},
+ 'skip': 'Invalid URL',
}, {
'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/',
'info_dict': {
@@ -211,11 +213,12 @@ class RutubeIE(RutubeBaseIE):
'duration': 293,
'uploader': 'MOEX - Московская биржа',
'timestamp': 1724946628,
- 'thumbnail': 'https://pic.rutubelist.ru/video/2e/24/2e241fddb459baf0fa54acfca44874f4.jpg',
+ 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'view_count': int,
'uploader_id': '38420507',
'categories': ['Интервью'],
},
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
'only_matching': True,
@@ -223,6 +226,26 @@ class RutubeIE(RutubeBaseIE):
'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
'only_matching': True,
}]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://novate.ru/blogs/170625/73644/',
+ 'info_dict': {
+ 'id': 'b0c96c75a4e5b274721bbced6ed8fb64',
+ 'ext': 'mp4',
+ 'title': 'Где в России находится единственная в своем роде скальная торпедная батарея',
+ 'age_limit': 0,
+ 'categories': ['Наука'],
+ 'chapters': [],
+ 'description': 'md5:2ed82e6b81958a43da6fb4d56f949e1f',
+ 'duration': 182,
+ 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
+ 'timestamp': 1749950158,
+ 'upload_date': '20250615',
+ 'uploader': 'Novate',
+ 'uploader_id': '24044809',
+ 'view_count': int,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -256,12 +279,10 @@ class RutubeEmbedIE(RutubeBaseIE):
'chapters': [],
'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b',
'view_count': int,
- 'thumbnail': 'https://pic.rutubelist.ru/video/d3/03/d3031f4670a6e6170d88fb3607948418.jpg',
+ 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'categories': ['Сериалы'],
},
- 'params': {
- 'skip_download': True,
- },
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://rutube.ru/play/embed/8083783',
'only_matching': True,
diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py
index 9bd26ad03e..11270a1f2c 100644
--- a/yt_dlp/extractor/rutv.py
+++ b/yt_dlp/extractor/rutv.py
@@ -16,96 +16,88 @@ class RUTVIE(InfoExtractor):
)
(?P\d+)
'''
- _EMBED_URLS = [
+ _EMBED_REGEX = [
r' |