[ie/mave] Add extractor for Mave Digital (download original full-size thumbnail)

2025-06-27 17:08:32 +00:00 · 2025-06-15 16:45:51 +02:00 · 2025-06-15 16:45:51 +02:00 · 2d5d25a4e5
commit 2d5d25a4e5
parent 4e4325e343
1 changed files with 14 additions and 3 deletions
--- a/yt_dlp/extractor/mave.py
+++ b/yt_dlp/extractor/mave.py
@ -1,3 +1,5 @@
+import re
+
 from .common import InfoExtractor
 from ..utils import clean_html, int_or_none, parse_iso8601, urljoin
 from ..utils.traversal import require, traverse_obj
@ -22,7 +24,7 @@ class MaveIE(InfoExtractor):
            'like_count': int,
            'dislike_count': int,
            'duration': 3744,
-            'thumbnail': 'https://api.mave.digital/storage/podcasts/2e0c3749-6df2-4946-82f4-50691419c065/images/f37be842-b1d8-425c-818c-21ebddf16032.jpg',
+            'thumbnail': r're:https://.*/storage/podcasts/2e0c3749-6df2-4946-82f4-50691419c065/images/f37be842-b1d8-425c-818c-21ebddf16032.jpg',
            'series': 'Очень личное',
            'series_id': '2e0c3749-6df2-4946-82f4-50691419c065',
            'season': 'Season 3',
@ -50,7 +52,7 @@ class MaveIE(InfoExtractor):
            'dislike_count': int,
            'age_limit': 18,
            'duration': 3664,
-            'thumbnail': 'https://api.mave.digital/storage/podcasts/fe9347bf-c009-4ebd-87e8-b06f2f324746/images/985679d7-ccd7-4232-8fe4-5eafca1be190.jpg',
+            'thumbnail': r're:https://.*/storage/podcasts/fe9347bf-c009-4ebd-87e8-b06f2f324746/images/985679d7-ccd7-4232-8fe4-5eafca1be190.jpg',
            'series': 'Все там будем',
            'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746',
            'season': 'Season 2',
@ -77,12 +79,12 @@ def _real_extract(self, url):
            'channel_id': channel_id,
            'channel_url': f'https://{channel_id}.mave.digital/',
            'vcodec': 'none',
+            'thumbnail': self._extract_thumbnail(webpage),
            **traverse_obj(data, ('activeEpisodeData', {
                'url': ('audio', {urljoin(self._API_BASE_URL)}),
                'id': ('id', {str}),
                'title': ('title', {str}),
                'description': ('description', {clean_html}),
-                'thumbnail': ('image', {urljoin(self._API_BASE_URL)}),
                'duration': ('duration', {int_or_none}),
                'season_number': ('season', {int_or_none}),
                'episode_number': ('number', {int_or_none}),
@ -99,3 +101,12 @@ def _real_extract(self, url):
                'uploader': ('author', {str}),
            })),
        }
+
+    def _extract_thumbnail(self, webpage):
+        # _API_BASE_URL could be used only to download mp3 file itself, not thumbnail.
+        # To get link to thumbnail we should use URL resolved to specific server
+        # E.g. https://ru-msk-dr3-1.store.cloud.mts.ru/, which could be loaded from og tags.
+        trimmed_thumbnail = self._og_search_thumbnail(webpage)
+        # To get link to base image, not trimmed one, remove postfix with resolution.
+        # E.g 123456789_600.jpg -> 123456789.jpg
+        return re.sub(r'_\d+(?=\.)', '', trimmed_thumbnail)