[cleanup] Misc (#8968)

Authored by: pukkandan, bashonly, seproDev
2026-03-09 15:50:55 +00:00 · 2024-03-10 20:48:44 +05:30
parent ed3bb2b0a1
commit 615a84447e
19 changed files with 80 additions and 94 deletions
--- a/yt_dlp/extractor/altcensored.py
+++ b/yt_dlp/extractor/altcensored.py
@@ -4,6 +4,7 @@ from .archiveorg import ArchiveOrgIE
 from .common import InfoExtractor
 from ..utils import (
    InAdvancePagedList,
+    clean_html,
    int_or_none,
    orderedSet,
    str_to_int,
@@ -32,13 +33,15 @@ class AltCensoredIE(InfoExtractor):
            'duration': 926.09,
            'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
            'view_count': int,
-            'categories': ['News & Politics'],  # FIXME
+            'categories': ['News & Politics'],
        }
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
+        category = clean_html(self._html_search_regex(
+            r'<a href="/category/\d+">([^<]+)</a>', webpage, 'category', default=None))

        return {
            '_type': 'url_transparent',
@@ -46,9 +49,7 @@ class AltCensoredIE(InfoExtractor):
            'ie_key': ArchiveOrgIE.ie_key(),
            'view_count': str_to_int(self._html_search_regex(
                r'YouTube Views:(?:\s|&nbsp;)*([\d,]+)', webpage, 'view count', default=None)),
-            'categories': self._html_search_regex(
-                r'<a href="/category/\d+">\s*\n?\s*([^<]+)</a>',
-                webpage, 'category', default='').split() or None,
+            'categories': [category] if category else None,
        }


--- a/yt_dlp/extractor/arte.py
+++ b/yt_dlp/extractor/arte.py
@@ -142,10 +142,10 @@ class ArteTVIE(ArteTVBaseIE):
    def _fix_accessible_subs_locale(subs):
        updated_subs = {}
        for lang, sub_formats in subs.items():
-            for format in sub_formats:
-                if format.get('url', '').endswith('-MAL.m3u8'):
+            for fmt in sub_formats:
+                if fmt.get('url', '').endswith('-MAL.m3u8'):
                    lang += '-acc'
-                updated_subs.setdefault(lang, []).append(format)
+                updated_subs.setdefault(lang, []).append(fmt)
        return updated_subs

    def _real_extract(self, url):
--- a/yt_dlp/extractor/getcourseru.py
+++ b/yt_dlp/extractor/getcourseru.py
@@ -160,9 +160,8 @@ class GetCourseRuIE(InfoExtractor):
            self._login(hostname, username, password)

        display_id = self._match_id(url)
-        # NB: 404 is returned due to yt-dlp not properly following redirects #9020
-        webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
-        if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
+        webpage, urlh = self._download_webpage_handle(url, display_id)
+        if self._LOGIN_URL_PATH in urlh.url:
            raise ExtractorError(
                f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
                expected=True)
--- a/yt_dlp/extractor/medaltv.py
+++ b/yt_dlp/extractor/medaltv.py
@@ -9,7 +9,6 @@ from ..utils import (
    int_or_none,
    str_or_none,
    traverse_obj,
-    update_url_query,
 )


@@ -82,7 +81,7 @@ class MedalTVIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        webpage = self._download_webpage(update_url_query(url, {'mobilebypass': 'true'}), video_id)
+        webpage = self._download_webpage(url, video_id, query={'mobilebypass': 'true'})

        hydration_data = self._search_json(
            r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
--- a/yt_dlp/extractor/radiko.py
+++ b/yt_dlp/extractor/radiko.py
@@ -162,10 +162,8 @@ class RadikoBaseIE(InfoExtractor):
        return formats

    def _extract_performers(self, prog):
-        performers = traverse_obj(prog, (
-            'pfm/text()', ..., {lambda x: re.split(r'[/／、　,，]', x)}, ..., {str.strip}))
-        # TODO: change 'artist' fields to 'artists' and return traversal list instead of str
-        return ', '.join(performers) or None
+        return traverse_obj(prog, (
+            'pfm/text()', ..., {lambda x: re.split(r'[/／、　,，]', x)}, ..., {str.strip})) or None


 class RadikoIE(RadikoBaseIE):
@@ -194,7 +192,7 @@ class RadikoIE(RadikoBaseIE):
        return {
            'id': video_id,
            'title': try_call(lambda: prog.find('title').text),
-            'artist': self._extract_performers(prog),
+            'cast': self._extract_performers(prog),
            'description': clean_html(try_call(lambda: prog.find('info').text)),
            'uploader': try_call(lambda: station_program.find('.//name').text),
            'uploader_id': station,
@@ -253,7 +251,7 @@ class RadikoRadioIE(RadikoBaseIE):
        return {
            'id': station,
            'title': title,
-            'artist': self._extract_performers(prog),
+            'cast': self._extract_performers(prog),
            'description': description,
            'uploader': station_name,
            'uploader_id': station,
--- a/yt_dlp/extractor/slideslive.py
+++ b/yt_dlp/extractor/slideslive.py
@@ -25,8 +25,8 @@ class SlidesLiveIE(InfoExtractor):
            'id': '38902413',
            'ext': 'mp4',
            'title': 'GCC IA16 backend',
-            'timestamp': 1648189972,
-            'upload_date': '20220325',
+            'timestamp': 1697793372,
+            'upload_date': '20231020',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:42',
            'chapters': 'count:41',
@@ -42,8 +42,8 @@ class SlidesLiveIE(InfoExtractor):
            'id': '38935785',
            'ext': 'mp4',
            'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
-            'upload_date': '20211115',
-            'timestamp': 1636996003,
+            'upload_date': '20231020',
+            'timestamp': 1697807002,
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:640',
            'chapters': 'count:639',
@@ -59,9 +59,9 @@ class SlidesLiveIE(InfoExtractor):
            'id': '38973182',
            'ext': 'mp4',
            'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
-            'upload_date': '20220201',
+            'upload_date': '20231020',
            'thumbnail': r're:^https?://.*\.jpg',
-            'timestamp': 1643728135,
+            'timestamp': 1697822521,
            'thumbnails': 'count:3',
            'chapters': 'count:2',
            'duration': 5889,
@@ -70,37 +70,22 @@ class SlidesLiveIE(InfoExtractor):
            'skip_download': 'm3u8',
        },
    }, {
-        # service_name = youtube, only XML slides info
+        # formerly youtube, converted to native
        'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
        'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
        'info_dict': {
-            'id': 'jmg02wCJD5M',
-            'display_id': '38897546',
+            'id': '38897546',
            'ext': 'mp4',
            'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
-            'description': 'Watch full version of this video at https://slideslive.com/38897546.',
-            'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
-            'channel': 'SlidesLive Videos - G1',
-            'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
-            'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
-            'uploader': 'SlidesLive Videos - G1',
-            'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
-            'live_status': 'not_live',
-            'upload_date': '20160710',
-            'timestamp': 1618786715,
-            'duration': 6827,
-            'like_count': int,
-            'view_count': int,
-            'comment_count': int,
-            'channel_follower_count': int,
-            'age_limit': 0,
-            'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'upload_date': '20231029',
+            'timestamp': 1698588144,
            'thumbnails': 'count:169',
-            'playable_in_embed': True,
-            'availability': 'unlisted',
-            'tags': [],
-            'categories': ['People & Blogs'],
            'chapters': 'count:168',
+            'duration': 6827,
+        },
+        'params': {
+            'skip_download': 'm3u8',
        },
    }, {
        # embed-only presentation, only XML slides info
@@ -111,8 +96,8 @@ class SlidesLiveIE(InfoExtractor):
            'title': 'Towards a Deep Network Architecture for Structured Smoothness',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:8',
-            'timestamp': 1629671508,
-            'upload_date': '20210822',
+            'timestamp': 1697803109,
+            'upload_date': '20231020',
            'chapters': 'count:7',
            'duration': 326,
        },
@@ -128,8 +113,8 @@ class SlidesLiveIE(InfoExtractor):
            'title': 'MoReL: Multi-omics Relational Learning',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:7',
-            'timestamp': 1654714970,
-            'upload_date': '20220608',
+            'timestamp': 1697824939,
+            'upload_date': '20231020',
            'chapters': 'count:6',
            'duration': 171,
        },
@@ -145,8 +130,8 @@ class SlidesLiveIE(InfoExtractor):
            'title': 'Decentralized Attribution of Generative Models',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:16',
-            'timestamp': 1622806321,
-            'upload_date': '20210604',
+            'timestamp': 1697814901,
+            'upload_date': '20231020',
            'chapters': 'count:15',
            'duration': 306,
        },
@@ -162,8 +147,8 @@ class SlidesLiveIE(InfoExtractor):
            'title': 'Efficient Active Search for Combinatorial Optimization Problems',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:9',
-            'timestamp': 1654714896,
-            'upload_date': '20220608',
+            'timestamp': 1697824757,
+            'upload_date': '20231020',
            'chapters': 'count:8',
            'duration': 295,
        },
@@ -177,10 +162,10 @@ class SlidesLiveIE(InfoExtractor):
            'id': '38979880',
            'ext': 'mp4',
            'title': 'The Representation Power of Neural Networks',
-            'timestamp': 1654714962,
+            'timestamp': 1697824919,
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:22',
-            'upload_date': '20220608',
+            'upload_date': '20231020',
            'chapters': 'count:21',
            'duration': 294,
        },
@@ -200,10 +185,10 @@ class SlidesLiveIE(InfoExtractor):
                'id': '38979682',
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
-                'timestamp': 1654714920,
+                'timestamp': 1697824815,
                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'thumbnails': 'count:30',
-                'upload_date': '20220608',
+                'upload_date': '20231020',
                'chapters': 'count:31',
                'duration': 272,
            },
@@ -213,8 +198,8 @@ class SlidesLiveIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
                'duration': 3,
-                'timestamp': 1654714920,
-                'upload_date': '20220608',
+                'timestamp': 1697824815,
+                'upload_date': '20231020',
            },
        }, {
            'info_dict': {
@@ -222,8 +207,8 @@ class SlidesLiveIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
                'duration': 4,
-                'timestamp': 1654714920,
-                'upload_date': '20220608',
+                'timestamp': 1697824815,
+                'upload_date': '20231020',
            },
        }],
        'params': {
@@ -242,10 +227,10 @@ class SlidesLiveIE(InfoExtractor):
                'id': '38979481',
                'ext': 'mp4',
                'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
-                'timestamp': 1654714877,
+                'timestamp': 1697824716,
                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'thumbnails': 'count:43',
-                'upload_date': '20220608',
+                'upload_date': '20231020',
                'chapters': 'count:43',
                'duration': 315,
            },
@@ -255,8 +240,8 @@ class SlidesLiveIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
                'duration': 3,
-                'timestamp': 1654714877,
-                'upload_date': '20220608',
+                'timestamp': 1697824716,
+                'upload_date': '20231020',
            },
        }],
        'params': {
@@ -275,10 +260,10 @@ class SlidesLiveIE(InfoExtractor):
            'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
            'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
            'uploader': 'SlidesLive Videos - A',
-            'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
-            'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
+            'uploader_id': '@slideslivevideos-a6075',
+            'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
            'upload_date': '20200903',
-            'timestamp': 1602599092,
+            'timestamp': 1697805922,
            'duration': 942,
            'age_limit': 0,
            'live_status': 'not_live',
@@ -303,8 +288,8 @@ class SlidesLiveIE(InfoExtractor):
            'id': '38983994',
            'ext': 'mp4',
            'title': 'Zero-Shot AutoML with Pretrained Models',
-            'timestamp': 1662384834,
-            'upload_date': '20220905',
+            'timestamp': 1697826708,
+            'upload_date': '20231020',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:23',
            'chapters': 'count:22',
@@ -336,8 +321,8 @@ class SlidesLiveIE(InfoExtractor):
            'title': 'Towards a Deep Network Architecture for Structured Smoothness',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:8',
-            'timestamp': 1629671508,
-            'upload_date': '20210822',
+            'timestamp': 1697803109,
+            'upload_date': '20231020',
            'chapters': 'count:7',
            'duration': 326,
        },
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -190,10 +190,9 @@ class TwitchBaseIE(InfoExtractor):
            'url': thumbnail,
        }] if thumbnail else None

-    def _extract_twitch_m3u8_formats(self, video_id, token, signature):
-        """Subclasses must define _M3U8_PATH"""
+    def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
        return self._extract_m3u8_formats(
-            f'{self._USHER_BASE}/{self._M3U8_PATH}/{video_id}.m3u8', video_id, 'mp4', query={
+            f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
                'allow_source': 'true',
                'allow_audio_only': 'true',
                'allow_spectre': 'true',
@@ -216,7 +215,6 @@ class TwitchVodIE(TwitchBaseIE):
                        )
                        (?P<id>\d+)
                    '''
-    _M3U8_PATH = 'vod'

    _TESTS = [{
        'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
@@ -547,7 +545,7 @@ class TwitchVodIE(TwitchBaseIE):
        access_token = self._download_access_token(vod_id, 'video', 'id')

        formats = self._extract_twitch_m3u8_formats(
-            vod_id, access_token['value'], access_token['signature'])
+            'vod', vod_id, access_token['value'], access_token['signature'])
        formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))

        self._prefer_source(formats)
@@ -926,7 +924,6 @@ class TwitchStreamIE(TwitchBaseIE):
                        )
                        (?P<id>[^/#?]+)
                    '''
-    _M3U8_PATH = 'api/channel/hls'

    _TESTS = [{
        'url': 'http://www.twitch.tv/shroomztv',
@@ -1032,7 +1029,7 @@ class TwitchStreamIE(TwitchBaseIE):

        stream_id = stream.get('id') or channel_name
        formats = self._extract_twitch_m3u8_formats(
-            channel_name, access_token['value'], access_token['signature'])
+            'api/channel/hls', channel_name, access_token['value'], access_token['signature'])
        self._prefer_source(formats)

        view_count = stream.get('viewers')
--- a/yt_dlp/extractor/vbox7.py
+++ b/yt_dlp/extractor/vbox7.py
@@ -43,7 +43,7 @@ class Vbox7IE(InfoExtractor):
            'uploader': 'svideteliat_ot_varshava',
            'view_count': int,
            'timestamp': 1360215023,
-            'thumbnail': 'https://i49.vbox7.com/design/iconci/png/noimg6.png',
+            'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg',
            'description': 'Смях! Чудо - чист за секунди - Скрита камера',
            'upload_date': '20130207',
            'duration': 83,