diff --git a/yt_dlp/extractor/volejtv.py b/yt_dlp/extractor/volejtv.py index 42ef9b128..b3c3351fa 100644 --- a/yt_dlp/extractor/volejtv.py +++ b/yt_dlp/extractor/volejtv.py @@ -1,40 +1,71 @@ from .common import InfoExtractor +from ..utils import ( + str_or_none, + strftime_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) class VolejTVIE(InfoExtractor): - _VALID_URL = r'https?://volej\.tv/video/(?P\d+)' + _VALID_URL = r'https?://volej\.tv/match/(?P\d+)' _TESTS = [{ - 'url': 'https://volej.tv/video/725742/', + 'url': 'https://volej.tv/match/270579', 'info_dict': { - 'id': '725742', + 'id': '270579', 'ext': 'mp4', - 'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV', - 'thumbnail': 'https://volej.tv/images/og/16/17186/og.png', - 'title': 'VK Královo Pole vs VK Prostějov', + 'title': 'CZE-SWE (2024-06-16)', + 'categories': ['ženy'], + 'series': 'ZLATÁ EVROPSKÁ VOLEJBALOVÁ LIGA', + 'season': '2023-2024', + 'timestamp': 1718553600, + 'upload_date': '20240616', }, }, { - 'url': 'https://volej.tv/video/725605/', + 'url': 'https://volej.tv/match/487520', 'info_dict': { - 'id': '725605', + 'id': '487520', 'ext': 'mp4', - 'thumbnail': 'https://volej.tv/images/og/15/17185/og.png', - 'title': 'VK Lvi Praha vs VK Euro Sitex Příbram', - 'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV', + 'thumbnail': r're:https://.+\.(png|jpeg)', + 'title': 'CZE-FRA (2024-09-06)', + 'categories': ['mládež'], + 'series': 'Mistrovství Evropy do 20 let', + 'season': '2024-2025', + 'timestamp': 1725627600, + 'upload_date': '20240906', + }, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - json_data = self._search_json( - r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id) - formats, subtitle = self._extract_m3u8_formats_and_subtitles( - json_data['urls']['hls'], video_id) - return { + json_data = self._download_json(f'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api/match/{video_id}', video_id) + formats = [] + tbr_resolution_mapping = {'6000': '1080p', '2400': '720p', '1500': '480p', '800': '360p'} + for video in traverse_obj(json_data, ('videos', 0, 'qualities')): + formats.append({ + 'url': video['cloud_front_path'], + 'tbr': int(video['quality']), + 'format_id': str(video['id']), + 'format_note': tbr_resolution_mapping[video['quality']], + }) + data = { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), - 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage), - 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage), + **traverse_obj(json_data, { + 'series': ('competition_name', {str_or_none}), + 'season': ('season', {str_or_none}), + 'timestamp': ('match_time', {unified_timestamp}), + 'categories': ('category', ('title'), {str}, filter, all, filter), + 'thumbnail': ('poster', {url_or_none}), + }), 'formats': formats, - 'subtitles': subtitle, } + teams = list(set(traverse_obj(json_data, ('teams', ..., 'shortcut')))) + if len(teams) > 2 and 'FIN' in teams: + teams.remove('FIN') + title = '-'.join(sorted(teams)) + if data.get('timestamp'): + title += f" ({strftime_or_none(data['timestamp'], '%Y-%m-%d')})" + data['title'] = title + return data