diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 34c98b537..642535d8b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2405,7 +2405,11 @@ VoicyChannelIE, VoicyIE, ) -from .volejtv import VolejTVIE +from .volejtv import ( + VolejTVCategoryPlaylistIE, + VolejTVClubPlaylistIE, + VolejTVIE, +) from .voxmedia import ( VoxMediaIE, VoxMediaVolumeIE, diff --git a/yt_dlp/extractor/volejtv.py b/yt_dlp/extractor/volejtv.py index 42ef9b128..2b0894570 100644 --- a/yt_dlp/extractor/volejtv.py +++ b/yt_dlp/extractor/volejtv.py @@ -1,40 +1,148 @@ +import functools + from .common import InfoExtractor +from ..utils import ( + InAdvancePagedList, + int_or_none, + join_nonempty, + orderedSet, + str_or_none, + strftime_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) -class VolejTVIE(InfoExtractor): - _VALID_URL = r'https?://volej\.tv/video/(?P\d+)' +class VolejTVBaseIE(InfoExtractor): + TBR_HEIGHT_MAPPING = {'6000': 1080, '2400': 720, '1500': 480, '800': 360} + + def _call_api(self, endpoint, display_id, query=None): + return self._download_json( + f'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api/{endpoint}', display_id, query=query) + + +class VolejTVIE(VolejTVBaseIE): + IE_NAME = 'volejtv:match' + _VALID_URL = r'https?://volej\.tv/match/(?P\d+)' _TESTS = [{ - 'url': 'https://volej.tv/video/725742/', + 'url': 'https://volej.tv/match/270579', 'info_dict': { - 'id': '725742', + 'id': '270579', 'ext': 'mp4', - 'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV', - 'thumbnail': 'https://volej.tv/images/og/16/17186/og.png', - 'title': 'VK Královo Pole vs VK Prostějov', + 'title': 'SWE-CZE (2024-06-16)', + 'categories': ['ženy'], + 'series': 'ZLATÁ EVROPSKÁ VOLEJBALOVÁ LIGA', + 'season': '2023-2024', + 'timestamp': 1718553600, + 'upload_date': '20240616', }, }, { - 'url': 'https://volej.tv/video/725605/', + 'url': 'https://volej.tv/match/487520', 'info_dict': { - 'id': '725605', + 'id': '487520', 'ext': 'mp4', - 'thumbnail': 'https://volej.tv/images/og/15/17185/og.png', - 'title': 'VK Lvi Praha vs VK Euro Sitex Příbram', - 'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV', + 'thumbnail': r're:https://.+\.(png|jpeg)', + 'title': 'FRA-CZE (2024-09-06)', + 'categories': ['mládež'], + 'series': 'Mistrovství Evropy do 20 let', + 'season': '2024-2025', + 'timestamp': 1725627600, + 'upload_date': '20240906', + }, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - json_data = self._search_json( - r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id) - formats, subtitle = self._extract_m3u8_formats_and_subtitles( - json_data['urls']['hls'], video_id) - return { + json_data = self._call_api(f'match/{video_id}', video_id) + formats = [] + for video in traverse_obj(json_data, ('videos', 0, 'qualities', lambda _, v: v['cloud_front_path'])): + formats.append(traverse_obj(video, { + 'url': ('cloud_front_path', {url_or_none}), + 'tbr': ('quality', {int_or_none}), + 'format_id': ('id', {str_or_none}), + 'height': ('quality', {lambda v: self.TBR_HEIGHT_MAPPING[v]}), + })) + data = { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), - 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage), - 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage), + **traverse_obj(json_data, { + 'series': ('competition_name', {str}), + 'season': ('season', {str}), + 'timestamp': ('match_time', {unified_timestamp}), + 'categories': ('category', ('title'), {str}, filter, all, filter), + 'thumbnail': ('poster', {url_or_none}), + }), 'formats': formats, - 'subtitles': subtitle, } + teams = orderedSet(traverse_obj(json_data, ('teams', ..., 'shortcut', {str}))) + if len(teams) > 2 and 'FIN' in teams: + teams.remove('FIN') + data['title'] = join_nonempty(join_nonempty(*teams, delim='-'), + f"({strftime_or_none(data['timestamp'], '%Y-%m-%d')})", delim=' ') + return data + + +class VolejTVClubPlaylistIE(VolejTVBaseIE): + IE_NAME = 'volejtv:club' + _VALID_URL = r'https?://volej\.tv/klub/(?P\d+)' + _TESTS = [{ + 'url': 'https://volej.tv/klub/1173', + 'info_dict': { + 'id': '1173', + 'title': 'VK Jihostroj České Budějovice', + }, + 'playlist_mincount': 30, + }] + _PAGE_SIZE = 6 + + def _get_page(self, playlist_id, page): + return self._call_api(f'match/by-team-id-paginated/{playlist_id}', playlist_id, + query={'page': page + 1, 'take': self._PAGE_SIZE, 'order': 'DESC'}) + + def _entries(self, playlist_id, first_page_data, page): + entries = first_page_data if page == 0 else self._get_page(playlist_id, page) + for entry in entries.get('data', []): + yield self.url_result(f"https://volej.tv/match/{entry['id']}", VolejTVIE) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + title = self._call_api(f'team/show/{playlist_id}', playlist_id)['title'] + first_page_data = self._get_page(playlist_id, 0) + total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int})) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, playlist_id, first_page_data), + total_pages, self._PAGE_SIZE), playlist_id, title) + + +class VolejTVCategoryPlaylistIE(VolejTVClubPlaylistIE): + IE_NAME = 'volejtv:category' + _VALID_URL = r'https?://volej\.tv/kategorie/(?P[^/$?]+)' + _TESTS = [{ + 'url': 'https://volej.tv/kategorie/chance-cesky-pohar', + 'info_dict': { + 'id': 'chance-cesky-pohar', + 'title': 'Chance Český pohár', + }, + 'playlist_mincount': 30, + }] + _PAGE_SIZE = 10 + + def _get_page(self, playlist_id, page): + return self._call_api(f'match/by-category-id-paginated/{playlist_id}', playlist_id, + query={'page': page + 1, 'take': self._PAGE_SIZE, 'order': 'DESC'}) + + def _get_category(self, playlist_id): + categories = self._call_api('category', playlist_id) + for category in categories: + if category['slug'] == str(playlist_id): + return category['id'], category['title'] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + category_id, title = self._get_category(playlist_id) + first_page_data = self._get_page(category_id, 0) + total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int})) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, category_id, first_page_data), + total_pages, self._PAGE_SIZE), playlist_id, title)