mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
[ie/mave] Add extractor for Mave Digital (code review fixes from #13380)
This commit is contained in:
parent
883dfe2994
commit
4e4325e343
@ -1,84 +1,101 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
)
|
||||
from ..utils import clean_html, int_or_none, parse_iso8601, urljoin
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class MaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<channel_id>[a-z]+)\.mave\.digital/ep-(?P<short_id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||
'info_dict': {
|
||||
'id': 'ochenlichnoe-25',
|
||||
'title': 'Между мной и миром: психология самооценки',
|
||||
'description': 'md5:83183d7002dc32fbebc3ccecd4a1ac03',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'ext': 'mp3',
|
||||
'channel': 'Очень личное',
|
||||
'channel_id': 'ochenlichnoe',
|
||||
'channel_url': 'https://ochenlichnoe.mave.digital/',
|
||||
},
|
||||
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||
'info_dict': {
|
||||
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ochenlichnoe-ep-25',
|
||||
'title': 'Между мной и миром: психология самооценки',
|
||||
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
||||
'uploader': 'Самарский университет',
|
||||
'channel': 'Очень личное',
|
||||
'channel_id': 'ochenlichnoe',
|
||||
'channel_url': 'https://ochenlichnoe.mave.digital/',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'duration': 3744,
|
||||
'thumbnail': 'https://api.mave.digital/storage/podcasts/2e0c3749-6df2-4946-82f4-50691419c065/images/f37be842-b1d8-425c-818c-21ebddf16032.jpg',
|
||||
'series': 'Очень личное',
|
||||
'series_id': '2e0c3749-6df2-4946-82f4-50691419c065',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': 'Episode 3',
|
||||
'episode_number': 3,
|
||||
'timestamp': 1747817300,
|
||||
'upload_date': '20250521',
|
||||
},
|
||||
{
|
||||
'url': 'https://budem.mave.digital/ep-12',
|
||||
'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f',
|
||||
'info_dict': {
|
||||
'id': 'budem-12',
|
||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||
'description': 'md5:d9ce1fc1fb5fc7b7a4e7a0b84a7861c3',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'ext': 'mp3',
|
||||
'channel': 'Все там будем',
|
||||
'channel_id': 'budem',
|
||||
'channel_url': 'https://budem.mave.digital/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://budem.mave.digital/ep-12',
|
||||
'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f',
|
||||
'info_dict': {
|
||||
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'budem-ep-12',
|
||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
||||
'uploader': 'Полина Цветкова+Евгения Акопова',
|
||||
'channel': 'Все там будем',
|
||||
'channel_id': 'budem',
|
||||
'channel_url': 'https://budem.mave.digital/',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'duration': 3664,
|
||||
'thumbnail': 'https://api.mave.digital/storage/podcasts/fe9347bf-c009-4ebd-87e8-b06f2f324746/images/985679d7-ccd7-4232-8fe4-5eafca1be190.jpg',
|
||||
'series': 'Все там будем',
|
||||
'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
'timestamp': 1735538400,
|
||||
'upload_date': '20241230',
|
||||
},
|
||||
]
|
||||
}]
|
||||
|
||||
_API_BASE_URL = 'https://api.mave.digital/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, short_id = self._match_valid_url(url).group('channel_id', 'short_id')
|
||||
|
||||
channel_url = f'https://{channel_id}.mave.digital/'
|
||||
|
||||
video_id = f'{channel_id}-{short_id}'
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Format: "TITLE — Подкаст «CHANNEL»"
|
||||
page_title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
match = re.search(r'^(.+?)\s*—\s*(.+?)«(.+?)»', page_title)
|
||||
title = match.group(1).strip()
|
||||
channel = match.group(3).strip()
|
||||
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
|
||||
display_id = f'{channel_id}-{slug}'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = traverse_obj(
|
||||
self._search_nuxt_json(webpage, display_id),
|
||||
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'channel': channel,
|
||||
'display_id': display_id,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': channel_url,
|
||||
'url': self._mave_link(webpage, video_id),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(data, ('activeEpisodeData', {
|
||||
'url': ('audio', {urljoin(self._API_BASE_URL)}),
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'thumbnail': ('image', {urljoin(self._API_BASE_URL)}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(data, ('podcast', 'podcast', {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
})),
|
||||
}
|
||||
|
||||
def _mave_link(self, webpage, video_id):
|
||||
data = get_element_by_id('__NUXT_DATA__', webpage)
|
||||
|
||||
jdata = json.loads(data)
|
||||
|
||||
for value in jdata:
|
||||
if isinstance(value, str):
|
||||
if value.endswith('.mp3'):
|
||||
link_id = value
|
||||
break
|
||||
|
||||
if link_id is None:
|
||||
raise ExtractorError('Unable to find mp3 file link', video_id=video_id)
|
||||
|
||||
return 'https://api.mave.digital/' + link_id
|
||||
|
Loading…
Reference in New Issue
Block a user