1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 17:08:32 +00:00

[ie/mave] Add extractor for Mave Digital (code review fixes from #13380)

This commit is contained in:
Anton Larionov 2025-06-15 15:39:32 +02:00
parent 883dfe2994
commit 4e4325e343

View File

@ -1,84 +1,101 @@
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
get_element_by_id,
)
from ..utils import clean_html, int_or_none, parse_iso8601, urljoin
from ..utils.traversal import require, traverse_obj
class MaveIE(InfoExtractor):
_VALID_URL = r'https?://(?P<channel_id>[a-z]+)\.mave\.digital/ep-(?P<short_id>[0-9]+)'
_TESTS = [
{
'url': 'https://ochenlichnoe.mave.digital/ep-25',
'md5': 'aa3e513ef588b4366df1520657cbc10c',
'info_dict': {
'id': 'ochenlichnoe-25',
'title': 'Между мной и миром: психология самооценки',
'description': 'md5:83183d7002dc32fbebc3ccecd4a1ac03',
'thumbnail': r're:https?://.*\.jpg$',
'ext': 'mp3',
'channel': 'Очень личное',
'channel_id': 'ochenlichnoe',
'channel_url': 'https://ochenlichnoe.mave.digital/',
},
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
_TESTS = [{
'url': 'https://ochenlichnoe.mave.digital/ep-25',
'md5': 'aa3e513ef588b4366df1520657cbc10c',
'info_dict': {
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
'ext': 'mp3',
'display_id': 'ochenlichnoe-ep-25',
'title': 'Между мной и миром: психология самооценки',
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
'uploader': 'Самарский университет',
'channel': 'Очень личное',
'channel_id': 'ochenlichnoe',
'channel_url': 'https://ochenlichnoe.mave.digital/',
'view_count': int,
'like_count': int,
'dislike_count': int,
'duration': 3744,
'thumbnail': 'https://api.mave.digital/storage/podcasts/2e0c3749-6df2-4946-82f4-50691419c065/images/f37be842-b1d8-425c-818c-21ebddf16032.jpg',
'series': 'Очень личное',
'series_id': '2e0c3749-6df2-4946-82f4-50691419c065',
'season': 'Season 3',
'season_number': 3,
'episode': 'Episode 3',
'episode_number': 3,
'timestamp': 1747817300,
'upload_date': '20250521',
},
{
'url': 'https://budem.mave.digital/ep-12',
'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f',
'info_dict': {
'id': 'budem-12',
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
'description': 'md5:d9ce1fc1fb5fc7b7a4e7a0b84a7861c3',
'thumbnail': r're:https?://.*\.jpg$',
'ext': 'mp3',
'channel': 'Все там будем',
'channel_id': 'budem',
'channel_url': 'https://budem.mave.digital/',
},
}, {
'url': 'https://budem.mave.digital/ep-12',
'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f',
'info_dict': {
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
'ext': 'mp3',
'display_id': 'budem-ep-12',
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
'uploader': 'Полина Цветкова+Евгения Акопова',
'channel': 'Все там будем',
'channel_id': 'budem',
'channel_url': 'https://budem.mave.digital/',
'view_count': int,
'like_count': int,
'dislike_count': int,
'age_limit': 18,
'duration': 3664,
'thumbnail': 'https://api.mave.digital/storage/podcasts/fe9347bf-c009-4ebd-87e8-b06f2f324746/images/985679d7-ccd7-4232-8fe4-5eafca1be190.jpg',
'series': 'Все там будем',
'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746',
'season': 'Season 2',
'season_number': 2,
'episode': 'Episode 5',
'episode_number': 5,
'timestamp': 1735538400,
'upload_date': '20241230',
},
]
}]
_API_BASE_URL = 'https://api.mave.digital/'
def _real_extract(self, url):
channel_id, short_id = self._match_valid_url(url).group('channel_id', 'short_id')
channel_url = f'https://{channel_id}.mave.digital/'
video_id = f'{channel_id}-{short_id}'
webpage = self._download_webpage(url, video_id)
# Format: "TITLE — Подкаст «CHANNEL»"
page_title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
match = re.search(r'^(.+?)\s*—\s*(.+?)«(.+?)»', page_title)
title = match.group(1).strip()
channel = match.group(3).strip()
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
display_id = f'{channel_id}-{slug}'
webpage = self._download_webpage(url, display_id)
data = traverse_obj(
self._search_nuxt_json(webpage, display_id),
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'channel': channel,
'display_id': display_id,
'channel_id': channel_id,
'channel_url': channel_url,
'url': self._mave_link(webpage, video_id),
'thumbnail': self._og_search_thumbnail(webpage),
'channel_url': f'https://{channel_id}.mave.digital/',
'vcodec': 'none',
**traverse_obj(data, ('activeEpisodeData', {
'url': ('audio', {urljoin(self._API_BASE_URL)}),
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {clean_html}),
'thumbnail': ('image', {urljoin(self._API_BASE_URL)}),
'duration': ('duration', {int_or_none}),
'season_number': ('season', {int_or_none}),
'episode_number': ('number', {int_or_none}),
'view_count': ('listenings', {int_or_none}),
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
'timestamp': ('publish_date', {parse_iso8601}),
})),
**traverse_obj(data, ('podcast', 'podcast', {
'series_id': ('id', {str}),
'series': ('title', {str}),
'channel': ('title', {str}),
'uploader': ('author', {str}),
})),
}
def _mave_link(self, webpage, video_id):
data = get_element_by_id('__NUXT_DATA__', webpage)
jdata = json.loads(data)
for value in jdata:
if isinstance(value, str):
if value.endswith('.mp3'):
link_id = value
break
if link_id is None:
raise ExtractorError('Unable to find mp3 file link', video_id=video_id)
return 'https://api.mave.digital/' + link_id