Merge remote-tracking branch 'upstream/master' into wait-retries

2025-08-12 15:38:32 +00:00 · 2025-05-14 04:11:29 +02:00 · 2025-05-14 04:11:29 +02:00 · 03624e625b
commit 03624e625b
parent 03365f470c 586b557b12
13 changed files with 433 additions and 344 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -338,7 +338,6 @@
 from .canalplus import CanalplusIE
 from .canalsurmas import CanalsurmasIE
 from .caracoltv import CaracolTvPlayIE
-from .cartoonnetwork import CartoonNetworkIE
 from .cbc import (
    CBCIE,
    CBCGemIE,
@ -929,7 +928,10 @@
 )
 from .jiosaavn import (
    JioSaavnAlbumIE,
+    JioSaavnArtistIE,
    JioSaavnPlaylistIE,
+    JioSaavnShowIE,
+    JioSaavnShowPlaylistIE,
    JioSaavnSongIE,
 )
 from .joj import JojIE
@ -1964,7 +1966,6 @@
    SpreakerShowIE,
 )
 from .springboardplatform import SpringboardPlatformIE
-from .sprout import SproutIE
 from .sproutvideo import (
    SproutVideoIE,
    VidsIoIE,
--- a/yt_dlp/extractor/amcnetworks.py
+++ b/yt_dlp/extractor/amcnetworks.py
@ -1,32 +1,24 @@
-import re
-
-from .theplatform import ThePlatformIE
-from ..utils import (
-    int_or_none,
-    parse_age_limit,
-    try_get,
-    update_url_query,
-)
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from ..utils.traversal import traverse_obj


-class AMCNetworksIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
-    _VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
+class AMCNetworksIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/?#]+)+)/[^/?#&]+)'
    _TESTS = [{
-        'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
+        'url': 'https://www.amc.com/shows/dark-winds/videos/dark-winds-a-look-at-season-3--1072027',
        'info_dict': {
-            'id': '4Lq1dzOnZGt0',
+            'id': '6369261343112',
            'ext': 'mp4',
-            'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
-            'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
-            'upload_date': '20201120',
-            'timestamp': 1605904350,
-            'uploader': 'AMCN',
+            'title': 'Dark Winds: A Look at Season 3',
+            'uploader_id': '6240731308001',
+            'duration': 176.427,
+            'thumbnail': r're:https://[^/]+\.boltdns\.net/.+/image\.jpg',
+            'tags': [],
+            'timestamp': 1740414792,
+            'upload_date': '20250224',
        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-        'skip': '404 Not Found',
+        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
        'only_matching': True,
@ -52,96 +44,18 @@ class AMCNetworksIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
        'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
        'only_matching': True,
    }]
-    _REQUESTOR_ID_MAP = {
-        'amc': 'AMC',
-        'bbcamerica': 'BBCA',
-        'ifc': 'IFC',
-        'sundancetv': 'SUNDANCE',
-        'wetv': 'WETV',
-    }

    def _real_extract(self, url):
-        site, display_id = self._match_valid_url(url).groups()
-        requestor_id = self._REQUESTOR_ID_MAP[site]
-        page_data = self._download_json(
-            f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
-            display_id)['data']
-        properties = page_data.get('properties') or {}
-        query = {
-            'mbr': 'true',
-            'manifest': 'm3u',
-        }
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        initial_data = self._search_json(
+            r'window\.initialData\s*=\s*JSON\.parse\(String\.raw`', webpage, 'initial data', display_id)
+        video_id = traverse_obj(initial_data, ('initialData', 'properties', 'videoId', {str}))
+        if not video_id:  # All locked videos are now DRM-protected
+            self.report_drm(display_id)
+        account_id = initial_data['config']['brightcove']['accountId']
+        player_id = initial_data['config']['brightcove']['playerId']

-        video_player_count = 0
-        try:
-            for v in page_data['children']:
-                if v.get('type') == 'video-player':
-                    release_pid = v['properties']['currentVideo']['meta']['releasePid']
-                    tp_path = 'M_UwQC/' + release_pid
-                    media_url = 'https://link.theplatform.com/s/' + tp_path
-                    video_player_count += 1
-        except KeyError:
-            pass
-        if video_player_count > 1:
-            self.report_warning(
-                f'The JSON data has {video_player_count} video players. Only one will be extracted')
-
-        # Fall back to videoPid if releasePid not found.
-        # TODO: Fall back to videoPid if releasePid manifest uses DRM.
-        if not video_player_count:
-            tp_path = 'M_UwQC/media/' + properties['videoPid']
-            media_url = 'https://link.theplatform.com/s/' + tp_path
-
-        theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
-        info = self._parse_theplatform_metadata(theplatform_metadata)
-        video_id = theplatform_metadata['pid']
-        title = theplatform_metadata['title']
-        rating = try_get(
-            theplatform_metadata, lambda x: x['ratings'][0]['rating'])
-        video_category = properties.get('videoCategory')
-        if video_category and video_category.endswith('-Auth'):
-            resource = self._get_mvpd_resource(
-                requestor_id, title, video_id, rating)
-            query['auth'] = self._extract_mvpd_auth(
-                url, video_id, requestor_id, resource)
-        media_url = update_url_query(media_url, query)
-        formats, subtitles = self._extract_theplatform_smil(
-            media_url, video_id)
-
-        thumbnails = []
-        thumbnail_urls = [properties.get('imageDesktop')]
-        if 'thumbnail' in info:
-            thumbnail_urls.append(info.pop('thumbnail'))
-        for thumbnail_url in thumbnail_urls:
-            if not thumbnail_url:
-                continue
-            mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
-            thumbnails.append({
-                'url': thumbnail_url,
-                'width': int(mobj.group(1)) if mobj else None,
-                'height': int(mobj.group(2)) if mobj else None,
-            })
-
-        info.update({
-            'age_limit': parse_age_limit(rating),
-            'formats': formats,
-            'id': video_id,
-            'subtitles': subtitles,
-            'thumbnails': thumbnails,
-        })
-        ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
-        if ns_keys:
-            ns = next(iter(ns_keys))
-            episode = theplatform_metadata.get(ns + '$episodeTitle') or None
-            episode_number = int_or_none(
-                theplatform_metadata.get(ns + '$episode'))
-            season_number = int_or_none(
-                theplatform_metadata.get(ns + '$season'))
-            series = theplatform_metadata.get(ns + '$show') or None
-            info.update({
-                'episode': episode,
-                'episode_number': episode_number,
-                'season_number': season_number,
-                'series': series,
-            })
-        return info
+        return self.url_result(
+            f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
+            BrightcoveNewIE, video_id)
--- a/yt_dlp/extractor/cartoonnetwork.py
+++ b/yt_dlp/extractor/cartoonnetwork.py
@ -1,59 +0,0 @@
-from .turner import TurnerBaseIE
-from ..utils import int_or_none
-
-
-class CartoonNetworkIE(TurnerBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
-    _TEST = {
-        'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
-        'info_dict': {
-            'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
-            'ext': 'mp4',
-            'title': 'How to Draw Upgrade',
-            'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
-            metadata_re = ''
-            if content_re:
-                metadata_re = r'|video_metadata\.content_' + content_re
-            return self._search_regex(
-                rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";',
-                webpage, name, fatal=fatal)
-
-        media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
-        title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
-
-        info = self._extract_ngtv_info(
-            media_id, {'networkId': 'cartoonnetwork'}, {
-                'url': url,
-                'site_name': 'CartoonNetwork',
-                'auth_required': find_field('authType', 'auth type') != 'unauth',
-            })
-
-        series = find_field(
-            'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
-        info.update({
-            'id': media_id,
-            'display_id': display_id,
-            'title': title,
-            'description': self._html_search_meta('description', webpage),
-            'series': series,
-            'episode': title,
-        })
-
-        for field in ('season', 'episode'):
-            field_name = field + 'Number'
-            info[field + '_number'] = int_or_none(find_field(
-                field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
-
-        return info
--- a/yt_dlp/extractor/jiosaavn.py
+++ b/yt_dlp/extractor/jiosaavn.py
@ -1,23 +1,33 @@
 import functools
+import itertools
 import math
 import re

 from .common import InfoExtractor
 from ..utils import (
    InAdvancePagedList,
+    ISO639Utils,
+    OnDemandPagedList,
    clean_html,
    int_or_none,
+    js_to_json,
    make_archive_id,
+    orderedSet,
    smuggle_url,
+    unified_strdate,
+    unified_timestamp,
    unsmuggle_url,
    url_basename,
    url_or_none,
    urlencode_postdata,
+    urljoin,
+    variadic,
 )
 from ..utils.traversal import traverse_obj


 class JioSaavnBaseIE(InfoExtractor):
+    _URL_BASE_RE = r'https?://(?:www\.)?(?:jio)?saavn\.com'
    _API_URL = 'https://www.jiosaavn.com/api.php'
    _VALID_BITRATES = {'16', '32', '64', '128', '320'}

@ -30,16 +40,20 @@ def requested_bitrates(self):
                f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
        return requested_bitrates

-    def _extract_formats(self, song_data):
+    def _extract_formats(self, item_data):
+        # Show/episode JSON data has a slightly different structure than song JSON data
+        if media_url := traverse_obj(item_data, ('more_info', 'encrypted_media_url', {str})):
+            item_data.setdefault('encrypted_media_url', media_url)
+
        for bitrate in self.requested_bitrates:
            media_data = self._download_json(
-                self._API_URL, song_data['id'],
+                self._API_URL, item_data['id'],
                f'Downloading format info for {bitrate}',
                fatal=False, data=urlencode_postdata({
                    '__call': 'song.generateAuthToken',
                    '_format': 'json',
                    'bitrate': bitrate,
-                    'url': song_data['encrypted_media_url'],
+                    'url': item_data['encrypted_media_url'],
                }))
            if not traverse_obj(media_data, ('auth_url', {url_or_none})):
                self.report_warning(f'Unable to extract format info for {bitrate}')
@ -53,24 +67,6 @@ def _extract_formats(self, song_data):
                'vcodec': 'none',
            }

-    def _extract_song(self, song_data, url=None):
-        info = traverse_obj(song_data, {
-            'id': ('id', {str}),
-            'title': ('song', {clean_html}),
-            'album': ('album', {clean_html}),
-            'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
-            'duration': ('duration', {int_or_none}),
-            'view_count': ('play_count', {int_or_none}),
-            'release_year': ('year', {int_or_none}),
-            'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
-            'webpage_url': ('perma_url', {url_or_none}),
-        })
-        if webpage_url := info.get('webpage_url') or url:
-            info['display_id'] = url_basename(webpage_url)
-            info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
-
-        return info
-
    def _call_api(self, type_, token, note='API', params={}):
        return self._download_json(
            self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
@ -84,19 +80,89 @@ def _call_api(self, type_, token, note='API', params={}):
                **params,
            })

-    def _yield_songs(self, playlist_data):
-        for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
-            song_info = self._extract_song(song_data)
-            url = smuggle_url(song_info['webpage_url'], {
-                'id': song_data['id'],
-                'encrypted_media_url': song_data['encrypted_media_url'],
-            })
-            yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
+    @staticmethod
+    def _extract_song(song_data, url=None):
+        info = traverse_obj(song_data, {
+            'id': ('id', {str}),
+            'title': (('song', 'title'), {clean_html}, any),
+            'album': ((None, 'more_info'), 'album', {clean_html}, any),
+            'duration': ((None, 'more_info'), 'duration', {int_or_none}, any),
+            'channel': ((None, 'more_info'), 'label', {str}, any),
+            'channel_id': ((None, 'more_info'), 'label_id', {str}, any),
+            'channel_url': ((None, 'more_info'), 'label_url', {urljoin('https://www.jiosaavn.com/')}, any),
+            'release_date': ((None, 'more_info'), 'release_date', {unified_strdate}, any),
+            'release_year': ('year', {int_or_none}),
+            'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
+            'view_count': ('play_count', {int_or_none}),
+            'language': ('language', {lambda x: ISO639Utils.short2long(x.casefold()) or 'und'}),
+            'webpage_url': ('perma_url', {url_or_none}),
+            'artists': ('more_info', 'artistMap', 'primary_artists', ..., 'name', {str}, filter, all),
+        })
+        if webpage_url := info.get('webpage_url') or url:
+            info['display_id'] = url_basename(webpage_url)
+            info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
+
+        if primary_artists := traverse_obj(song_data, ('primary_artists', {lambda x: x.split(', ') if x else None})):
+            info['artists'].extend(primary_artists)
+        if featured_artists := traverse_obj(song_data, ('featured_artists', {str}, filter)):
+            info['artists'].extend(featured_artists.split(', '))
+        info['artists'] = orderedSet(info['artists']) or None
+
+        return info
+
+    @staticmethod
+    def _extract_episode(episode_data, url=None):
+        info = JioSaavnBaseIE._extract_song(episode_data, url)
+        info.pop('_old_archive_ids', None)
+        info.update(traverse_obj(episode_data, {
+            'description': ('more_info', 'description', {str}),
+            'timestamp': ('more_info', 'release_time', {unified_timestamp}),
+            'series': ('more_info', 'show_title', {str}),
+            'series_id': ('more_info', 'show_id', {str}),
+            'season': ('more_info', 'season_title', {str}),
+            'season_number': ('more_info', 'season_no', {int_or_none}),
+            'season_id': ('more_info', 'season_id', {str}),
+            'episode_number': ('more_info', 'episode_number', {int_or_none}),
+            'cast': ('starring', {lambda x: x.split(', ') if x else None}),
+        }))
+        return info
+
+    def _extract_jiosaavn_result(self, url, endpoint, response_key, parse_func):
+        url, smuggled_data = unsmuggle_url(url)
+        data = traverse_obj(smuggled_data, ({
+            'id': ('id', {str}),
+            'encrypted_media_url': ('encrypted_media_url', {str}),
+        }))
+
+        if 'id' in data and 'encrypted_media_url' in data:
+            result = {'id': data['id']}
+        else:
+            # only extract metadata if this is not a url_transparent result
+            data = self._call_api(endpoint, self._match_id(url))[response_key][0]
+            result = parse_func(data, url)
+
+        result['formats'] = list(self._extract_formats(data))
+        return result
+
+    def _yield_items(self, playlist_data, keys=None, parse_func=None):
+        """Subclasses using this method must set _ENTRY_IE"""
+        if parse_func is None:
+            parse_func = self._extract_song
+
+        for item_data in traverse_obj(playlist_data, (
+            *variadic(keys, (str, bytes, dict, set)), lambda _, v: v['id'] and v['perma_url'],
+        )):
+            info = parse_func(item_data)
+            url = smuggle_url(info['webpage_url'], traverse_obj(item_data, {
+                'id': ('id', {str}),
+                'encrypted_media_url': ((None, 'more_info'), 'encrypted_media_url', {str}, any),
+            }))
+            yield self.url_result(url, self._ENTRY_IE, url_transparent=True, **info)


 class JioSaavnSongIE(JioSaavnBaseIE):
    IE_NAME = 'jiosaavn:song'
-    _VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
+    _VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'(?:/song/[^/?#]+/|/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
        'md5': '3b84396d15ed9e083c3106f1fa589c04',
@ -106,12 +172,38 @@ class JioSaavnSongIE(JioSaavnBaseIE):
            'ext': 'm4a',
            'title': 'Leja Re',
            'album': 'Leja Re',
-            'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
+            'thumbnail': r're:https?://.+/.+\.jpg',
            'duration': 205,
            'view_count': int,
            'release_year': 2018,
            'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
            '_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
+            'channel': 'T-Series',
+            'language': 'hin',
+            'channel_id': '34297',
+            'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
+            'release_date': '20181124',
+        },
+    }, {
+        'url': 'https://www.jiosaavn.com/song/chuttamalle/P1FfWjZkQ0Q',
+        'md5': '96296c58d6ce488a417ef0728fd2d680',
+        'info_dict': {
+            'id': 'O94kBTtw',
+            'display_id': 'P1FfWjZkQ0Q',
+            'ext': 'm4a',
+            'title': 'Chuttamalle',
+            'album': 'Devara Part 1 - Telugu',
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'duration': 222,
+            'view_count': int,
+            'release_year': 2024,
+            'artists': 'count:3',
+            '_old_archive_ids': ['jiosaavnsong P1FfWjZkQ0Q'],
+            'channel': 'T-Series',
+            'language': 'tel',
+            'channel_id': '34297',
+            'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
+            'release_date': '20240926',
        },
    }, {
        'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
@ -119,26 +211,51 @@ class JioSaavnSongIE(JioSaavnBaseIE):
    }]

    def _real_extract(self, url):
-        url, smuggled_data = unsmuggle_url(url)
-        song_data = traverse_obj(smuggled_data, ({
-            'id': ('id', {str}),
-            'encrypted_media_url': ('encrypted_media_url', {str}),
-        }))
+        return self._extract_jiosaavn_result(url, 'song', 'songs', self._extract_song)

-        if 'id' in song_data and 'encrypted_media_url' in song_data:
-            result = {'id': song_data['id']}
-        else:
-            # only extract metadata if this is not a url_transparent result
-            song_data = self._call_api('song', self._match_id(url))['songs'][0]
-            result = self._extract_song(song_data, url)

-        result['formats'] = list(self._extract_formats(song_data))
-        return result
+class JioSaavnShowIE(JioSaavnBaseIE):
+    IE_NAME = 'jiosaavn:show'
+    _VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/[^/?#]+/(?P<id>[^/?#]{11,})/?(?:$|[?#])'
+    _TESTS = [{
+        'url': 'https://www.jiosaavn.com/shows/non-food-ways-to-boost-your-energy/XFMcKICOCgc_',
+        'md5': '0733cd254cfe74ef88bea1eaedcf1f4f',
+        'info_dict': {
+            'id': 'qqzh3RKZ',
+            'display_id': 'XFMcKICOCgc_',
+            'ext': 'mp3',
+            'title': 'Non-Food Ways To Boost Your Energy',
+            'description': 'md5:26e7129644b5c6aada32b8851c3997c8',
+            'episode': 'Episode 1',
+            'timestamp': 1640563200,
+            'series': 'Holistic Lifestyle With Neha Ranglani',
+            'series_id': '52397',
+            'season': 'Holistic Lifestyle With Neha Ranglani',
+            'season_number': 1,
+            'season_id': '61273',
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'duration': 311,
+            'view_count': int,
+            'release_year': 2021,
+            'language': 'eng',
+            'channel': 'Saavn OG',
+            'channel_id': '1953876',
+            'episode_number': 1,
+            'upload_date': '20211227',
+            'release_date': '20211227',
+        },
+    }, {
+        'url': 'https://www.jiosaavn.com/shows/himesh-reshammiya/Kr8fmfSN4vo_',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        return self._extract_jiosaavn_result(url, 'episode', 'episodes', self._extract_episode)


 class JioSaavnAlbumIE(JioSaavnBaseIE):
    IE_NAME = 'jiosaavn:album'
-    _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
+    _VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/album/[^/?#]+/(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
        'info_dict': {
@ -147,18 +264,19 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
        },
        'playlist_count': 10,
    }]
+    _ENTRY_IE = JioSaavnSongIE

    def _real_extract(self, url):
        display_id = self._match_id(url)
        album_data = self._call_api('album', display_id)

        return self.playlist_result(
-            self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
+            self._yield_items(album_data, 'songs'), display_id, traverse_obj(album_data, ('title', {str})))


 class JioSaavnPlaylistIE(JioSaavnBaseIE):
    IE_NAME = 'jiosaavn:playlist'
-    _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
+    _VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
        'info_dict': {
@ -172,15 +290,16 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
            'id': 'DVR,pFUOwyXqIp77B1JF,A__',
            'title': 'Mood Hindi',
        },
-        'playlist_mincount': 801,
+        'playlist_mincount': 750,
    }, {
        'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
        'info_dict': {
            'id': 'Me5RridRfDk_',
            'title': 'Taaza Tunes',
        },
-        'playlist_mincount': 301,
+        'playlist_mincount': 50,
    }]
+    _ENTRY_IE = JioSaavnSongIE
    _PAGE_SIZE = 50

    def _fetch_page(self, token, page):
@ -189,7 +308,7 @@ def _fetch_page(self, token, page):

    def _entries(self, token, first_page_data, page):
        page_data = first_page_data if not page else self._fetch_page(token, page + 1)
-        yield from self._yield_songs(page_data)
+        yield from self._yield_items(page_data, 'songs')

    def _real_extract(self, url):
        display_id = self._match_id(url)
@ -199,3 +318,95 @@ def _real_extract(self, url):
        return self.playlist_result(InAdvancePagedList(
            functools.partial(self._entries, display_id, playlist_data),
            total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
+
+
+class JioSaavnShowPlaylistIE(JioSaavnBaseIE):
+    IE_NAME = 'jiosaavn:show:playlist'
+    _VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/(?P<show>[^#/?]+)/(?P<season>\d+)/[^/?#]+'
+    _TESTS = [{
+        'url': 'https://www.jiosaavn.com/shows/talking-music/1/PjReFP-Sguk_',
+        'info_dict': {
+            'id': 'talking-music-1',
+            'title': 'Talking Music',
+        },
+        'playlist_mincount': 11,
+    }]
+    _ENTRY_IE = JioSaavnShowIE
+    _PAGE_SIZE = 10
+
+    def _fetch_page(self, show_id, season_id, page):
+        return self._call_api('show', show_id, f'show page {page}', {
+            'p': page,
+            '__call': 'show.getAllEpisodes',
+            'show_id': show_id,
+            'season_number': season_id,
+            'api_version': '4',
+            'sort_order': 'desc',
+        })
+
+    def _entries(self, show_id, season_id, page):
+        page_data = self._fetch_page(show_id, season_id, page + 1)
+        yield from self._yield_items(page_data, keys=None, parse_func=self._extract_episode)
+
+    def _real_extract(self, url):
+        show_slug, season_id = self._match_valid_url(url).group('show', 'season')
+        playlist_id = f'{show_slug}-{season_id}'
+        webpage = self._download_webpage(url, playlist_id)
+
+        show_info = self._search_json(
+            r'window\.__INITIAL_DATA__\s*=', webpage, 'initial data',
+            playlist_id, transform_source=js_to_json)['showView']
+        show_id = show_info['current_id']
+
+        entries = OnDemandPagedList(functools.partial(self._entries, show_id, season_id), self._PAGE_SIZE)
+        return self.playlist_result(
+            entries, playlist_id, traverse_obj(show_info, ('show', 'title', 'text', {str})))
+
+
+class JioSaavnArtistIE(JioSaavnBaseIE):
+    IE_NAME = 'jiosaavn:artist'
+    _VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/artist/[^/?#]+/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://www.jiosaavn.com/artist/krsna-songs/rYLBEve2z3U_',
+        'info_dict': {
+            'id': 'rYLBEve2z3U_',
+            'title': 'KR$NA',
+        },
+        'playlist_mincount': 38,
+    }, {
+        'url': 'https://www.jiosaavn.com/artist/sanam-puri-songs/SkNEv3qRhDE_',
+        'info_dict': {
+            'id': 'SkNEv3qRhDE_',
+            'title': 'Sanam Puri',
+        },
+        'playlist_mincount': 51,
+    }]
+    _ENTRY_IE = JioSaavnSongIE
+    _PAGE_SIZE = 50
+
+    def _fetch_page(self, artist_id, page):
+        return self._call_api('artist', artist_id, f'artist page {page + 1}', {
+            'p': page,
+            'n_song': self._PAGE_SIZE,
+            'n_album': self._PAGE_SIZE,
+            'sub_type': '',
+            'includeMetaTags': '',
+            'api_version': '4',
+            'category': 'alphabetical',
+            'sort_order': 'asc',
+        })
+
+    def _entries(self, artist_id, first_page):
+        for page in itertools.count():
+            playlist_data = first_page if not page else self._fetch_page(artist_id, page)
+            if not traverse_obj(playlist_data, ('topSongs', ..., {dict})):
+                break
+            yield from self._yield_items(playlist_data, 'topSongs')
+
+    def _real_extract(self, url):
+        artist_id = self._match_id(url)
+        first_page = self._fetch_page(artist_id, 0)
+
+        return self.playlist_result(
+            self._entries(artist_id, first_page), artist_id,
+            traverse_obj(first_page, ('name', {str})))
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@ -16,6 +16,7 @@
    determine_ext,
    float_or_none,
    int_or_none,
+    parse_bitrate,
    parse_duration,
    parse_iso8601,
    parse_qs,
@ -23,7 +24,6 @@
    qualities,
    remove_start,
    str_or_none,
-    try_get,
    unescapeHTML,
    unified_timestamp,
    update_url_query,
@ -785,8 +785,6 @@ class NiconicoLiveIE(NiconicoBaseIE):
        'only_matching': True,
    }]

-    _KNOWN_LATENCY = ('high', 'low')
-
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
@ -802,22 +800,19 @@ def _real_extract(self, url):
        })

        hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
-        latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
-        if latency not in self._KNOWN_LATENCY:
-            latency = 'high'

        ws = self._request_webpage(
            Request(ws_url, headers={'Origin': f'https://{hostname}'}),
            video_id=video_id, note='Connecting to WebSocket server')

-        self.write_debug('[debug] Sending HLS server request')
+        self.write_debug('Sending HLS server request')
        ws.send(json.dumps({
            'type': 'startWatching',
            'data': {
                'stream': {
                    'quality': 'abr',
-                    'protocol': 'hls+fmp4',
-                    'latency': latency,
+                    'protocol': 'hls',
+                    'latency': 'high',
                    'accessRightMethod': 'single_cookie',
                    'chasePlay': False,
                },
@ -881,18 +876,29 @@ def _real_extract(self, url):
        for cookie in cookies:
            self._set_cookie(
                cookie['domain'], cookie['name'], cookie['value'],
-                expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
+                expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
+
+        fmt_common = {
+            'live_latency': 'high',
+            'origin': hostname,
+            'protocol': 'niconico_live',
+            'video_id': video_id,
+            'ws': ws,
+        }
+        q_iter = (q for q in qualities[1:] if not q.startswith('audio_'))  # ignore initial 'abr'
+        a_map = {96: 'audio_low', 192: 'audio_high'}

        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
-        for fmt, q in zip(formats, reversed(qualities[1:])):
-            fmt.update({
-                'format_id': q,
-                'protocol': 'niconico_live',
-                'ws': ws,
-                'video_id': video_id,
-                'live_latency': latency,
-                'origin': hostname,
-            })
+        for fmt in formats:
+            if fmt.get('acodec') == 'none':
+                fmt['format_id'] = next(q_iter, fmt['format_id'])
+            elif fmt.get('vcodec') == 'none':
+                abr = parse_bitrate(fmt['url'].lower())
+                fmt.update({
+                    'abr': abr,
+                    'format_id': a_map.get(abr, fmt['format_id']),
+                })
+            fmt.update(fmt_common)

        return {
            'id': video_id,
--- a/yt_dlp/extractor/nytimes.py
+++ b/yt_dlp/extractor/nytimes.py
@ -181,6 +181,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
            'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
            'duration': 119.0,
        },
+        'skip': 'HTTP Error 500: Internal Server Error',
    }, {
        # article with audio and no video
        'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
@ -190,13 +191,14 @@ class NYTimesArticleIE(NYTimesBaseIE):
            'ext': 'mp3',
            'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
            'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
-            'timestamp': 1695960700,
+            'timestamp': 1696008129,
            'upload_date': '20230929',
-            'creator': 'Stephanie Nolen, Natalija Gormalova',
+            'creators': ['Stephanie Nolen', 'Natalija Gormalova'],
            'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
            'duration': 1322,
        },
    }, {
+        # lede_media_block already has sourceId
        'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
        'md5': '3eb5ddb1d6f86254fe4f233826778737',
        'info_dict': {
@ -207,7 +209,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
            'timestamp': 1701290997,
            'upload_date': '20231129',
            'uploader': 'By The New York Times',
-            'creator': 'Katie Rogers',
+            'creators': ['Katie Rogers'],
            'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
            'duration': 97.631,
        },
@ -222,10 +224,22 @@ class NYTimesArticleIE(NYTimesBaseIE):
            'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
            'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
            'upload_date': '20231202',
-            'creator': 'Emily Steel, Sydney Ember',
+            'creators': ['Emily Steel', 'Sydney Ember'],
            'timestamp': 1701511264,
        },
        'playlist_count': 3,
+    }, {
+        # lede_media_block does not have sourceId
+        'url': 'https://www.nytimes.com/2025/04/30/well/move/hip-mobility-routine.html',
+        'info_dict': {
+            'id': 'hip-mobility-routine',
+            'title': 'Tight Hips? These Moves Can Help.',
+            'description': 'Sitting all day is hard on your hips. Try this simple routine for better mobility.',
+            'creators': ['Alyssa Ages', 'Theodore Tae'],
+            'timestamp': 1746003629,
+            'upload_date': '20250430',
+        },
+        'playlist_count': 7,
    }, {
        'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
        'only_matching': True,
@ -256,14 +270,18 @@ def _extract_content_from_block(self, block):

    def _real_extract(self, url):
        page_id = self._match_id(url)
-        webpage = self._download_webpage(url, page_id)
+        webpage = self._download_webpage(url, page_id, impersonate=True)
        art_json = self._search_json(
            r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
            transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
+        content = art_json['sprinkledBody']['content']

-        blocks = traverse_obj(art_json, (
-            'sprinkledBody', 'content', ..., ('ledeMedia', None),
-            lambda _, v: v['__typename'] in ('Video', 'Audio')))
+        blocks = []
+        block_filter = lambda k, v: k == 'media' and v['__typename'] in ('Video', 'Audio')
+        if lede_media_block := traverse_obj(content, (..., 'ledeMedia', block_filter, any)):
+            lede_media_block.setdefault('sourceId', art_json.get('sourceId'))
+            blocks.append(lede_media_block)
+        blocks.extend(traverse_obj(content, (..., block_filter)))
        if not blocks:
            raise ExtractorError('Unable to extract any media blocks from webpage')

@ -273,8 +291,7 @@ def _real_extract(self, url):
                'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
                get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
            'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
-            'creator': ', '.join(
-                traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))),  # TODO: change to 'creators' (list)
+            'creators': traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName', {str})),
            'thumbnails': self._extract_thumbnails(traverse_obj(
                art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
        }
--- a/yt_dlp/extractor/playsuisse.py
+++ b/yt_dlp/extractor/playsuisse.py
@ -7,11 +7,13 @@
 from ..utils import (
    ExtractorError,
    int_or_none,
+    join_nonempty,
    parse_qs,
    traverse_obj,
    update_url_query,
    urlencode_postdata,
 )
+from ..utils.traversal import unpack


 class PlaySuisseIE(InfoExtractor):
@ -26,12 +28,12 @@ class PlaySuisseIE(InfoExtractor):
        {
            # episode in a series
            'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
-            'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
+            'md5': 'e20d1ede6872a03b41905ca1060a1ef2',
            'info_dict': {
                'id': '763211',
                'ext': 'mp4',
                'title': 'Knochen',
-                'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
+                'description': 'md5:3bdd80e2ce20227c47aab1df2a79a519',
                'duration': 3344,
                'series': 'Wilder',
                'season': 'Season 1',
@ -42,24 +44,33 @@ class PlaySuisseIE(InfoExtractor):
            },
        }, {
            # film
-            'url': 'https://www.playsuisse.ch/watch/808675',
-            'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
+            'url': 'https://www.playsuisse.ch/detail/2573198',
+            'md5': '1f115bb0a5191477b1a5771643a4283d',
            'info_dict': {
-                'id': '808675',
+                'id': '2573198',
                'ext': 'mp4',
-                'title': 'Der Läufer',
-                'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
-                'duration': 5280,
+                'title': 'Azor',
+                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+                'genres': ['Fiction'],
+                'creators': ['Andreas Fontana'],
+                'cast': ['Fabrizio Rongione', 'Stéphanie Cléau', 'Gilles Privat', 'Alexandre Trocki'],
+                'location': 'France; Argentine',
+                'release_year': 2021,
+                'duration': 5981,
                'thumbnail': 're:https://playsuisse-img.akamaized.net/',
            },
        }, {
            # series (treated as a playlist)
            'url': 'https://www.playsuisse.ch/detail/1115687',
            'info_dict': {
-                'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
                'id': '1115687',
                'series': 'They all came out to Montreux',
                'title': 'They all came out to Montreux',
+                'description': 'md5:0fefd8c5b4468a0bb35e916887681520',
+                'genres': ['Documentary'],
+                'creators': ['Oliver Murray'],
+                'location': 'Switzerland',
+                'release_year': 2021,
            },
            'playlist': [{
                'info_dict': {
@ -120,6 +131,12 @@ class PlaySuisseIE(InfoExtractor):
            id
            name
            description
+            descriptionLong
+            year
+            contentTypes
+            directors
+            mainCast
+            productionCountries
            duration
            episodeNumber
            seasonNumber
@ -215,9 +232,7 @@ def _perform_login(self, username, password):
        if not self._ID_TOKEN:
            raise ExtractorError('Login failed')

-    def _get_media_data(self, media_id):
-        # NOTE In the web app, the "locale" header is used to switch between languages,
-        # However this doesn't seem to take effect when passing the header here.
+    def _get_media_data(self, media_id, locale=None):
        response = self._download_json(
            'https://www.playsuisse.ch/api/graphql',
            media_id, data=json.dumps({
@ -225,7 +240,7 @@ def _get_media_data(self, media_id):
                'query': self._GRAPHQL_QUERY,
                'variables': {'assetId': media_id},
            }).encode(),
-            headers={'Content-Type': 'application/json', 'locale': 'de'})
+            headers={'Content-Type': 'application/json', 'locale': locale or 'de'})

        return response['data']['assetV2']

@ -234,7 +249,7 @@ def _real_extract(self, url):
            self.raise_login_required(method='password')

        media_id = self._match_id(url)
-        media_data = self._get_media_data(media_id)
+        media_data = self._get_media_data(media_id, traverse_obj(parse_qs(url), ('locale', 0)))
        info = self._extract_single(media_data)
        if media_data.get('episodes'):
            info.update({
@ -257,15 +272,22 @@ def _extract_single(self, media_data):
            self._merge_subtitles(subs, target=subtitles)

        return {
-            'id': media_data['id'],
-            'title': media_data.get('name'),
-            'description': media_data.get('description'),
            'thumbnails': thumbnails,
-            'duration': int_or_none(media_data.get('duration')),
            'formats': formats,
            'subtitles': subtitles,
-            'series': media_data.get('seriesName'),
-            'season_number': int_or_none(media_data.get('seasonNumber')),
-            'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
-            'episode_number': int_or_none(media_data.get('episodeNumber')),
+            **traverse_obj(media_data, {
+                'id': ('id', {str}),
+                'title': ('name', {str}),
+                'description': (('descriptionLong', 'description'), {str}, any),
+                'genres': ('contentTypes', ..., {str}),
+                'creators': ('directors', ..., {str}),
+                'cast': ('mainCast', ..., {str}),
+                'location': ('productionCountries', ..., {str}, all, {unpack(join_nonempty, delim='; ')}, filter),
+                'release_year': ('year', {str}, {lambda x: x[:4]}, {int_or_none}),
+                'duration': ('duration', {int_or_none}),
+                'series': ('seriesName', {str}),
+                'season_number': ('seasonNumber', {int_or_none}),
+                'episode': ('name', {str}, {lambda x: x if media_data['episodeNumber'] is not None else None}),
+                'episode_number': ('episodeNumber', {int_or_none}),
+            }),
        }
--- a/yt_dlp/extractor/sprout.py
+++ b/yt_dlp/extractor/sprout.py
@ -1,61 +0,0 @@
-from .adobepass import AdobePassIE
-from ..utils import (
-    int_or_none,
-    smuggle_url,
-    update_url_query,
-)
-
-
-class SproutIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
-    _TESTS = [{
-        'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
-        'info_dict': {
-            'id': 'bm0foJFaTKqb',
-            'ext': 'mp4',
-            'title': 'Robot Bike Race',
-            'description': 'md5:436b1d97117cc437f54c383f4debc66d',
-            'timestamp': 1606148940,
-            'upload_date': '20201123',
-            'uploader': 'NBCU-MPAT',
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.universalkids.com/watch/robot-bike-race',
-        'only_matching': True,
-    }]
-    _GEO_COUNTRIES = ['US']
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        mpx_metadata = self._download_json(
-            # http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
-            'https://www.universalkids.com/_api/videos/' + display_id,
-            display_id)['mpxMetadata']
-        media_pid = mpx_metadata['mediaPid']
-        theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
-        query = {
-            'mbr': 'true',
-            'manifest': 'm3u',
-        }
-        if mpx_metadata.get('entitlement') == 'auth':
-            query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
-        theplatform_url = smuggle_url(
-            update_url_query(theplatform_url, query), {
-                'force_smil_url': True,
-                'geo_countries': self._GEO_COUNTRIES,
-            })
-        return {
-            '_type': 'url_transparent',
-            'id': media_pid,
-            'url': theplatform_url,
-            'series': mpx_metadata.get('seriesName'),
-            'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
-            'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
-            'ie_key': 'ThePlatform',
-        }
--- a/yt_dlp/extractor/svt.py
+++ b/yt_dlp/extractor/svt.py
@ -471,8 +471,7 @@ def _real_extract(self, url):
        webpage = self._download_webpage(url, display_id)
        title = self._og_search_title(webpage)

-        urql_state = self._search_json(
-            r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
+        urql_state = self._search_json(r'urqlState\s*[=:]', webpage, 'json data', display_id)

        data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}

--- a/yt_dlp/extractor/wat.py
+++ b/yt_dlp/extractor/wat.py
@ -2,9 +2,11 @@
 from ..utils import (
    ExtractorError,
    int_or_none,
+    join_nonempty,
    try_get,
    unified_strdate,
 )
+from ..utils.traversal import traverse_obj


 class WatIE(InfoExtractor):
@ -70,8 +72,14 @@ def _real_extract(self, url):

        error_desc = video_info.get('error_desc')
        if error_desc:
-            if video_info.get('error_code') == 'GEOBLOCKED':
+            error_code = video_info.get('error_code')
+            if error_code == 'GEOBLOCKED':
                self.raise_geo_restricted(error_desc, video_info.get('geoList'))
+            elif error_code == 'DELIVERY_ERROR':
+                if traverse_obj(video_data, ('delivery', 'code')) == 500:
+                    self.report_drm(video_id)
+                error_desc = join_nonempty(
+                    error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ')
            raise ExtractorError(error_desc, expected=True)

        title = video_info['title']
--- a/yt_dlp/extractor/youtube/_clip.py
+++ b/yt_dlp/extractor/youtube/_clip.py
@ -37,6 +37,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
            'chapters': 'count:20',
            'comment_count': int,
            'heatmap': 'count:100',
+            'media_type': 'clip',
        },
    }]

@ -59,6 +60,7 @@ def _real_extract(self, url):
            'url': f'https://www.youtube.com/watch?v={video_id}',
            'ie_key': YoutubeIE.ie_key(),
            'id': clip_id,
+            'media_type': 'clip',
            'section_start': int(clip_data['startTimeMs']) / 1000,
            'section_end': int(clip_data['endTimeMs']) / 1000,
            '_format_sort_fields': (  # https protocol is prioritized for ffmpeg compatibility
--- a/yt_dlp/extractor/youtube/_redirect.py
+++ b/yt_dlp/extractor/youtube/_redirect.py
@ -35,6 +35,7 @@ class YoutubeYtBeIE(YoutubeBaseInfoExtractor):
            'duration': 59,
            'comment_count': int,
            'channel_follower_count': int,
+            'media_type': 'short',
        },
        'params': {
            'noplaylist': True,
--- a/yt_dlp/extractor/youtube/_video.py
+++ b/yt_dlp/extractor/youtube/_video.py
@ -376,6 +376,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader': 'Afrojack',
                'uploader_url': 'https://www.youtube.com/@Afrojack',
                'uploader_id': '@Afrojack',
+                'media_type': 'video',
            },
            'params': {
                'youtube_include_dash_manifest': True,
@ -413,10 +414,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'channel_is_verified': True,
                'heatmap': 'count:100',
                'timestamp': 1401991663,
+                'media_type': 'video',
            },
        },
        {
-            'note': 'Age-gate video with embed allowed in public site',
+            'note': 'Formerly an age-gate video with embed allowed in public site',
            'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
            'info_dict': {
                'id': 'HsUATh_Nc2U',
@ -424,8 +426,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'title': 'Godzilla 2 (Official Video)',
                'description': 'md5:bf77e03fcae5529475e500129b05668a',
                'upload_date': '20200408',
-                'age_limit': 18,
-                'availability': 'needs_auth',
+                'age_limit': 0,
+                'availability': 'public',
                'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
                'channel': 'FlyingKitty',
                'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
@ -443,8 +445,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_id': '@FlyingKitty900',
                'comment_count': int,
                'channel_is_verified': True,
+                'media_type': 'video',
            },
-            'skip': 'Age-restricted; requires authentication',
        },
        {
            'note': 'Age-gate video embedable only with clientScreen=EMBED',
@ -507,6 +509,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader': 'Herr Lurik',
                'uploader_url': 'https://www.youtube.com/@HerrLurik',
                'uploader_id': '@HerrLurik',
+                'media_type': 'video',
            },
        },
        {
@ -546,6 +549,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader': 'deadmau5',
                'uploader_url': 'https://www.youtube.com/@deadmau5',
                'uploader_id': '@deadmau5',
+                'media_type': 'video',
            },
            'expected_warnings': [
                'DASH manifest missing',
@ -581,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_id': '@Olympics',
                'channel_is_verified': True,
                'timestamp': 1440707674,
+                'media_type': 'livestream',
            },
            'params': {
                'skip_download': 'requires avconv',
@ -615,6 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_url': 'https://www.youtube.com/@AllenMeow',
                'uploader_id': '@AllenMeow',
                'timestamp': 1299776999,
+                'media_type': 'video',
            },
        },
        # url_encoded_fmt_stream_map is empty string
@ -809,6 +815,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'like_count': int,
                'age_limit': 0,
                'channel_follower_count': int,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -868,6 +875,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_id': '@BKCHarvard',
                'uploader_url': 'https://www.youtube.com/@BKCHarvard',
                'timestamp': 1422422076,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -904,6 +912,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'channel_is_verified': True,
                'heatmap': 'count:100',
                'timestamp': 1447987198,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -968,6 +977,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'comment_count': int,
                'channel_is_verified': True,
                'timestamp': 1484761047,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -1070,6 +1080,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'tags': 'count:11',
                'live_status': 'not_live',
                'channel_follower_count': int,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -1124,6 +1135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
                'uploader_id': '@ElevageOrVert',
                'timestamp': 1497343210,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -1163,6 +1175,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'channel_is_verified': True,
                'heatmap': 'count:100',
                'timestamp': 1377976349,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -1207,6 +1220,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'channel_follower_count': int,
                'uploader': 'The Cinematic Orchestra',
                'comment_count': int,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -1275,6 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
                'uploader_id': '@walkaroundjapan7124',
                'timestamp': 1605884416,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -1371,6 +1386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'channel_is_verified': True,
                'heatmap': 'count:100',
                'timestamp': 1395685455,
+                'media_type': 'video',
            }, 'params': {'format': 'mhtml', 'skip_download': True},
        }, {
            # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@ -1401,6 +1417,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_id': '@LeonNguyen',
                'heatmap': 'count:100',
                'timestamp': 1641170939,
+                'media_type': 'video',
            },
        }, {
            # date text is premiered video, ensure upload date in UTC (published 1641172509)
@ -1434,6 +1451,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'channel_is_verified': True,
                'heatmap': 'count:100',
                'timestamp': 1641172509,
+                'media_type': 'video',
            },
        },
        {   # continuous livestream.
@ -1495,6 +1513,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader': 'Lesmiscore',
                'uploader_url': 'https://www.youtube.com/@lesmiscore',
                'timestamp': 1648005313,
+                'media_type': 'short',
            },
        }, {
            # Prefer primary title+description language metadata by default
@ -1523,6 +1542,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_id': '@coletdjnz',
                'uploader': 'cole-dlp-test-acc',
                'timestamp': 1662677394,
+                'media_type': 'video',
            },
            'params': {'skip_download': True},
        }, {
@ -1551,6 +1571,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader': 'cole-dlp-test-acc',
                'timestamp': 1659073275,
                'like_count': int,
+                'media_type': 'video',
            },
            'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
            'expected_warnings': [r'Preferring "fr" translated fields'],
@ -1587,6 +1608,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'comment_count': int,
                'channel_is_verified': True,
                'heatmap': 'count:100',
+                'media_type': 'video',
            },
            'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
        }, {
@ -1687,6 +1709,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'comment_count': int,
                'channel_is_verified': True,
                'heatmap': 'count:100',
+                'media_type': 'video',
            },
            'params': {
                'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
@ -1719,6 +1742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'channel_follower_count': int,
                'categories': ['People & Blogs'],
                'tags': [],
+                'media_type': 'short',
            },
        },
    ]
@ -1754,6 +1778,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_id': '@ChristopherSykesDocumentaries',
                'heatmap': 'count:100',
                'timestamp': 1211825920,
+                'media_type': 'video',
            },
            'params': {
                'skip_download': True,
@ -3787,7 +3812,10 @@ def is_bad_format(fmt):
            'tags': keywords,
            'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
            'live_status': live_status,
-            'media_type': 'livestream' if get_first(video_details, 'isLiveContent') else None,
+            'media_type': (
+                'livestream' if get_first(video_details, 'isLiveContent')
+                else 'short' if get_first(microformats, 'isShortsEligible')
+                else 'video'),
            'release_timestamp': live_start_time,
            '_format_sort_fields': (  # source_preference is lower for potentially damaged formats
                'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),