From 71b4555a96d651a3e86c3f1bb6fe441bf92d34f7 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Mon, 19 May 2025 18:48:27 +0530 Subject: [PATCH 1/3] [ie/volejtv] Fix extractor (#13203) --- yt_dlp/extractor/volejtv.py | 73 ++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/yt_dlp/extractor/volejtv.py b/yt_dlp/extractor/volejtv.py index 42ef9b128..b3c3351fa 100644 --- a/yt_dlp/extractor/volejtv.py +++ b/yt_dlp/extractor/volejtv.py @@ -1,40 +1,71 @@ from .common import InfoExtractor +from ..utils import ( + str_or_none, + strftime_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) class VolejTVIE(InfoExtractor): - _VALID_URL = r'https?://volej\.tv/video/(?P\d+)' + _VALID_URL = r'https?://volej\.tv/match/(?P\d+)' _TESTS = [{ - 'url': 'https://volej.tv/video/725742/', + 'url': 'https://volej.tv/match/270579', 'info_dict': { - 'id': '725742', + 'id': '270579', 'ext': 'mp4', - 'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV', - 'thumbnail': 'https://volej.tv/images/og/16/17186/og.png', - 'title': 'VK Královo Pole vs VK Prostějov', + 'title': 'CZE-SWE (2024-06-16)', + 'categories': ['ženy'], + 'series': 'ZLATÁ EVROPSKÁ VOLEJBALOVÁ LIGA', + 'season': '2023-2024', + 'timestamp': 1718553600, + 'upload_date': '20240616', }, }, { - 'url': 'https://volej.tv/video/725605/', + 'url': 'https://volej.tv/match/487520', 'info_dict': { - 'id': '725605', + 'id': '487520', 'ext': 'mp4', - 'thumbnail': 'https://volej.tv/images/og/15/17185/og.png', - 'title': 'VK Lvi Praha vs VK Euro Sitex Příbram', - 'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV', + 'thumbnail': r're:https://.+\.(png|jpeg)', + 'title': 'CZE-FRA (2024-09-06)', + 'categories': ['mládež'], + 'series': 'Mistrovství Evropy do 20 let', + 'season': '2024-2025', + 'timestamp': 1725627600, + 'upload_date': '20240906', + }, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - json_data = self._search_json( - r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id) - formats, subtitle = self._extract_m3u8_formats_and_subtitles( - json_data['urls']['hls'], video_id) - return { + json_data = self._download_json(f'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api/match/{video_id}', video_id) + formats = [] + tbr_resolution_mapping = {'6000': '1080p', '2400': '720p', '1500': '480p', '800': '360p'} + for video in traverse_obj(json_data, ('videos', 0, 'qualities')): + formats.append({ + 'url': video['cloud_front_path'], + 'tbr': int(video['quality']), + 'format_id': str(video['id']), + 'format_note': tbr_resolution_mapping[video['quality']], + }) + data = { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), - 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage), - 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage), + **traverse_obj(json_data, { + 'series': ('competition_name', {str_or_none}), + 'season': ('season', {str_or_none}), + 'timestamp': ('match_time', {unified_timestamp}), + 'categories': ('category', ('title'), {str}, filter, all, filter), + 'thumbnail': ('poster', {url_or_none}), + }), 'formats': formats, - 'subtitles': subtitle, } + teams = list(set(traverse_obj(json_data, ('teams', ..., 'shortcut')))) + if len(teams) > 2 and 'FIN' in teams: + teams.remove('FIN') + title = '-'.join(sorted(teams)) + if data.get('timestamp'): + title += f" ({strftime_or_none(data['timestamp'], '%Y-%m-%d')})" + data['title'] = title + return data From 39ddc887d0aa860445f1e3b18d385aff6fe4bc42 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Mon, 19 May 2025 20:39:19 +0530 Subject: [PATCH 2/3] [ie/volejtv] Add extractor Add extractors for Club and Category playlist --- yt_dlp/extractor/_extractors.py | 6 ++- yt_dlp/extractor/volejtv.py | 81 ++++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e7dcb9853..1cdf48015 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2404,7 +2404,11 @@ VoicyChannelIE, VoicyIE, ) -from .volejtv import VolejTVIE +from .volejtv import ( + VolejTVCategoryPlaylistIE, + VolejTVClubPlaylistIE, + VolejTVIE, +) from .voxmedia import ( VoxMediaIE, VoxMediaVolumeIE, diff --git a/yt_dlp/extractor/volejtv.py b/yt_dlp/extractor/volejtv.py index b3c3351fa..d53db9db9 100644 --- a/yt_dlp/extractor/volejtv.py +++ b/yt_dlp/extractor/volejtv.py @@ -1,5 +1,8 @@ +import functools + from .common import InfoExtractor from ..utils import ( + InAdvancePagedList, str_or_none, strftime_or_none, traverse_obj, @@ -8,7 +11,16 @@ ) -class VolejTVIE(InfoExtractor): +class VolejTVBaseIE(InfoExtractor): + _API_URL = 'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api' + + def _call_api(self, endpoint, api_id, query={}): + return self._download_json(f'{self._API_URL}/{endpoint}', api_id, + 'Downloading JSON', 'Unable to download JSON', query=query) + + +class VolejTVIE(VolejTVBaseIE): + IE_NAME = 'volejtv:match' _VALID_URL = r'https?://volej\.tv/match/(?P\d+)' _TESTS = [{ 'url': 'https://volej.tv/match/270579', @@ -40,7 +52,7 @@ class VolejTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json(f'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api/match/{video_id}', video_id) + json_data = self._call_api(f'match/{video_id}', video_id) formats = [] tbr_resolution_mapping = {'6000': '1080p', '2400': '720p', '1500': '480p', '800': '360p'} for video in traverse_obj(json_data, ('videos', 0, 'qualities')): @@ -69,3 +81,68 @@ def _real_extract(self, url): title += f" ({strftime_or_none(data['timestamp'], '%Y-%m-%d')})" data['title'] = title return data + + +class VolejTVClubPlaylistIE(VolejTVBaseIE): + IE_NAME = 'volejtv:club' + _VALID_URL = r'https?://volej\.tv/klub/(?P\d+)' + _TESTS = [{ + 'url': 'https://volej.tv/klub/1173', + 'info_dict': { + 'id': '1173', + 'title': 'VK Jihostroj České Budějovice', + }, + 'playlist_mincount': 30, + }] + _PAGE_SIZE = 6 + + def _get_page(self, playlist_id, page): + return self._call_api(f'match/by-team-id-paginated/{playlist_id}', playlist_id, + query={'page': page + 1, 'take': self._PAGE_SIZE, 'order': 'DESC'}) + + def _entries(self, playlist_id, first_page_data, page): + entries = first_page_data if page == 0 else self._get_page(playlist_id, page) + for entry in entries.get('data', []): + yield self.url_result(f"https://volej.tv/match/{entry['id']}", VolejTVIE) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + title = self._call_api(f'team/show/{playlist_id}', playlist_id)['title'] + first_page_data = self._get_page(playlist_id, 0) + total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int})) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, playlist_id, first_page_data), + total_pages, self._PAGE_SIZE), playlist_id, title) + + +class VolejTVCategoryPlaylistIE(VolejTVClubPlaylistIE): + IE_NAME = 'volejtv:category' + _VALID_URL = r'https?://volej\.tv/kategorie/(?P[^/$?]+)' + _TESTS = [{ + 'url': 'https://volej.tv/kategorie/chance-cesky-pohar', + 'info_dict': { + 'id': 'chance-cesky-pohar', + 'title': 'Chance Český pohár', + }, + 'playlist_mincount': 30, + }] + _PAGE_SIZE = 10 + + def _get_page(self, playlist_id, page): + return self._call_api(f'match/by-category-id-paginated/{playlist_id}', playlist_id, + query={'page': page + 1, 'take': self._PAGE_SIZE, 'order': 'DESC'}) + + def _get_category(self, playlist_id): + categories = self._call_api('category', playlist_id) + for category in categories: + if category['slug'] == str(playlist_id): + return category['id'], category['title'] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + category_id, title = self._get_category(playlist_id) + first_page_data = self._get_page(category_id, 0) + total_pages = traverse_obj(first_page_data, ('meta', 'pageCount', {int})) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, category_id, first_page_data), + total_pages, self._PAGE_SIZE), playlist_id, title) From eafa873af0ad78de748e68d3427c4d4dd63dac15 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Tue, 27 May 2025 20:19:14 +0530 Subject: [PATCH 3/3] [ie/volejtv] make changes as suggested by bashonly --- yt_dlp/extractor/volejtv.py | 42 ++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/yt_dlp/extractor/volejtv.py b/yt_dlp/extractor/volejtv.py index d53db9db9..2b0894570 100644 --- a/yt_dlp/extractor/volejtv.py +++ b/yt_dlp/extractor/volejtv.py @@ -3,6 +3,9 @@ from .common import InfoExtractor from ..utils import ( InAdvancePagedList, + int_or_none, + join_nonempty, + orderedSet, str_or_none, strftime_or_none, traverse_obj, @@ -12,11 +15,11 @@ class VolejTVBaseIE(InfoExtractor): - _API_URL = 'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api' + TBR_HEIGHT_MAPPING = {'6000': 1080, '2400': 720, '1500': 480, '800': 360} - def _call_api(self, endpoint, api_id, query={}): - return self._download_json(f'{self._API_URL}/{endpoint}', api_id, - 'Downloading JSON', 'Unable to download JSON', query=query) + def _call_api(self, endpoint, display_id, query=None): + return self._download_json( + f'https://api-volejtv-prod.apps.okd4.devopsie.cloud/api/{endpoint}', display_id, query=query) class VolejTVIE(VolejTVBaseIE): @@ -27,7 +30,7 @@ class VolejTVIE(VolejTVBaseIE): 'info_dict': { 'id': '270579', 'ext': 'mp4', - 'title': 'CZE-SWE (2024-06-16)', + 'title': 'SWE-CZE (2024-06-16)', 'categories': ['ženy'], 'series': 'ZLATÁ EVROPSKÁ VOLEJBALOVÁ LIGA', 'season': '2023-2024', @@ -40,7 +43,7 @@ class VolejTVIE(VolejTVBaseIE): 'id': '487520', 'ext': 'mp4', 'thumbnail': r're:https://.+\.(png|jpeg)', - 'title': 'CZE-FRA (2024-09-06)', + 'title': 'FRA-CZE (2024-09-06)', 'categories': ['mládež'], 'series': 'Mistrovství Evropy do 20 let', 'season': '2024-2025', @@ -54,32 +57,29 @@ def _real_extract(self, url): video_id = self._match_id(url) json_data = self._call_api(f'match/{video_id}', video_id) formats = [] - tbr_resolution_mapping = {'6000': '1080p', '2400': '720p', '1500': '480p', '800': '360p'} - for video in traverse_obj(json_data, ('videos', 0, 'qualities')): - formats.append({ - 'url': video['cloud_front_path'], - 'tbr': int(video['quality']), - 'format_id': str(video['id']), - 'format_note': tbr_resolution_mapping[video['quality']], - }) + for video in traverse_obj(json_data, ('videos', 0, 'qualities', lambda _, v: v['cloud_front_path'])): + formats.append(traverse_obj(video, { + 'url': ('cloud_front_path', {url_or_none}), + 'tbr': ('quality', {int_or_none}), + 'format_id': ('id', {str_or_none}), + 'height': ('quality', {lambda v: self.TBR_HEIGHT_MAPPING[v]}), + })) data = { 'id': video_id, **traverse_obj(json_data, { - 'series': ('competition_name', {str_or_none}), - 'season': ('season', {str_or_none}), + 'series': ('competition_name', {str}), + 'season': ('season', {str}), 'timestamp': ('match_time', {unified_timestamp}), 'categories': ('category', ('title'), {str}, filter, all, filter), 'thumbnail': ('poster', {url_or_none}), }), 'formats': formats, } - teams = list(set(traverse_obj(json_data, ('teams', ..., 'shortcut')))) + teams = orderedSet(traverse_obj(json_data, ('teams', ..., 'shortcut', {str}))) if len(teams) > 2 and 'FIN' in teams: teams.remove('FIN') - title = '-'.join(sorted(teams)) - if data.get('timestamp'): - title += f" ({strftime_or_none(data['timestamp'], '%Y-%m-%d')})" - data['title'] = title + data['title'] = join_nonempty(join_nonempty(*teams, delim='-'), + f"({strftime_or_none(data['timestamp'], '%Y-%m-%d')})", delim=' ') return data