[ie/oftv] Fix extractors (#13209)

1. Add extractors for new url pattern 2. Fix extractors for old url pattern
2026-02-14 04:26:21 +00:00 · 2025-05-19 04:43:50 +05:30
parent 2685654a37
commit 586fd0f915
2 changed files with 105 additions and 25 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1443,7 +1443,9 @@ from .odkmedia import OnDemandChinaEpisodeIE
 from .odnoklassniki import OdnoklassnikiIE
 from .oftv import (
    OfTVIE,
+    OfTVNewIE,
    OfTVPlaylistIE,
+    OfTVPlaylistNewIE,
 )
 from .oktoberfesttv import OktoberfestTVIE
 from .olympics import OlympicsReplayIE
--- a/yt_dlp/extractor/oftv.py
+++ b/yt_dlp/extractor/oftv.py
@@ -1,54 +1,132 @@
 from .common import InfoExtractor
-from .zype import ZypeIE
-from ..utils import traverse_obj
+from ..utils import (
+    int_or_none,
+    str_or_none,
+    traverse_obj,
+    unified_timestamp,
+    url_or_none,
+    urljoin,
+)
+
+
+class OfTVNewIE(InfoExtractor):
+    IE_NAME = 'oftv:video-new'
+    _VALID_URL = r'https?://(?:www\.)?of\.tv/v/(?P<id>[^#/?]+)'
+    _TESTS = [{
+        'url': 'https://of.tv/v/zjtc6',
+        'md5': 'fcdffb9e0a375851d53a939b45313a8c',
+        'info_dict': {
+            'id': 'zjtc6',
+            'ext': 'mp4',
+            'title': 'S1E1: Monte Cristo Sandwich',
+            'thumbnails': 'mincount:3',
+            'thumbnail': r're:https://.+\.(jpg|webp)',
+            'description': 'md5:89a6a3404540e9d5a4ec9ffa63a85d4d',
+            'duration': 1423,
+            'timestamp': 1652394900,
+            'upload_date': '20220512',
+            'creators': 'count:4',
+            'channel': 'This is Fire',
+            'channel_id': '9iGia',
+            'channel_url': 'https://of.tv/c/this-is-fire',
+        },
+    }]
+
+    def _extract_data(self, json_data):
+        thumbnails = []
+        video_id = traverse_obj(json_data, ('unique_id', {str}))
+        for k, v in json_data.get('thumbnail', {}).items():
+            thumbnails.append({'url': v, 'preference': int(k)})
+        m3u8_url = traverse_obj(json_data, ('video_src', {url_or_none}))
+        return {
+            'id': video_id,
+            **traverse_obj(json_data, {
+                'title': ('title', {str}),
+                'alt_title': ('long_title', {str_or_none}),
+                'description': ('description', {str_or_none}),
+                'duration': ('duration', {int_or_none}),
+                'timestamp': ('published_at', {unified_timestamp}),
+                'creators': ('featured_creators', ..., (('nickname', 'of_handle'))),
+                'season': ('season', {str_or_none}),
+                'episode': ('episode', {str_or_none}),
+                'channel': ('creator', 'channel_name', {str_or_none}),
+                'channel_id': ('creator', 'unique_id', {str_or_none}),
+                'channel_url': ('creator', 'oftv_handle', {urljoin('https://of.tv/c/')}),
+            }),
+            'formats': self._extract_m3u8_formats(m3u8_url, video_id),
+            'thumbnails': thumbnails,
+        }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        json_data = self._download_json(f'https://api.of.tv/v0/pages/videos/{video_id}', video_id)['data']['video']
+        return self._extract_data(json_data)


 class OfTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?of\.tv/video/(?P<id>\w+)'
+    IE_NAME = 'oftv:video'
+    _VALID_URL = r'https?://(?:www\.)?of\.tv/video/(?P<id>[^#!/]+)'
    _TESTS = [{
        'url': 'https://of.tv/video/627d7d95b353db0001dadd1a',
-        'md5': 'cb9cd5db3bb9ee0d32bfd7e373d6ef0a',
+        'md5': 'fcdffb9e0a375851d53a939b45313a8c',
        'info_dict': {
-            'id': '627d7d95b353db0001dadd1a',
+            'id': 'zjtc6',
            'ext': 'mp4',
-            'title': 'E1: Jacky vs Eric',
-            'thumbnail': r're:^https?://.*\.jpg',
-            'average_rating': 0,
-            'description': 'md5:dd16e3e2a8d27d922e7a989f85986853',
-            'display_id': '',
+            'title': 'S1E1: Monte Cristo Sandwich',
+            'thumbnails': 'mincount:3',
+            'thumbnail': r're:https://.+\.(jpg|webp)',
+            'description': 'md5:89a6a3404540e9d5a4ec9ffa63a85d4d',
            'duration': 1423,
-            'timestamp': 1652391300,
+            'timestamp': 1652394900,
            'upload_date': '20220512',
-            'view_count': 0,
-            'creator': 'This is Fire',
+            'creators': 'count:4',
+            'channel': 'This is Fire',
+            'channel_id': '9iGia',
+            'channel_url': 'https://of.tv/c/this-is-fire',
        },
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        info = next(ZypeIE.extract_from_webpage(self._downloader, url, webpage))
-        info['_type'] = 'url_transparent'
-        info['creator'] = self._search_regex(r'<a[^>]+class=\"creator-name\"[^>]+>([^<]+)', webpage, 'creator')
-        return info
+        return self.url_result(self._og_search_url(webpage), OfTVNewIE)
+
+
+class OfTVPlaylistNewIE(OfTVNewIE):
+    IE_NAME = 'oftv:playlist-new'
+    _VALID_URL = r'https?://(?:www\.)?of\.tv/c/(?P<id>[^/#?]+)'
+    _TESTS = [{
+        'url': 'https://of.tv/c/this-is-fire/',
+        'info_dict': {
+            'id': 'this-is-fire',
+            'title': 'This is Fire',
+        },
+        'playlist_mincount': 44,
+    }]
+
+    def _entries(self, json_data):
+        for entry in json_data.get('items', []):
+            yield self._extract_data(entry)
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        json_data = self._download_json(f'https://api.of.tv/v0/pages/creators/{playlist_id}', playlist_id)['data']['creator_playlist']
+        return self.playlist_result(self._entries(json_data), playlist_id, traverse_obj(json_data, ('label', {str})))


 class OfTVPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?of\.tv/creators/(?P<id>[a-zA-Z0-9-]+)/?(?:$|[?#])'
+    IE_NAME = 'oftv:playlist'
+    _VALID_URL = r'https?://(?:www\.)?of\.tv/creators/(?P<id>[^/#?]+)'
    _TESTS = [{
        'url': 'https://of.tv/creators/this-is-fire/',
-        'playlist_count': 8,
        'info_dict': {
            'id': 'this-is-fire',
+            'title': 'This is Fire',
        },
+        'playlist_mincount': 44,
    }]

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
-
-        json_match = self._search_json(
-            r'var\s*remaining_videos\s*=', webpage, 'oftv playlists', playlist_id, contains_pattern=r'\[.+\]')
-
-        return self.playlist_from_matches(
-            traverse_obj(json_match, (..., 'discovery_url')), playlist_id)
+        return self.url_result(self._og_search_url(webpage), OfTVPlaylistNewIE)