mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
[ie/oftv] Fix extractors (#13209)
1. Add extractors for new url pattern 2. Fix extractors for old url pattern
This commit is contained in:
parent
2685654a37
commit
586fd0f915
@ -1443,7 +1443,9 @@
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oftv import (
|
||||
OfTVIE,
|
||||
OfTVNewIE,
|
||||
OfTVPlaylistIE,
|
||||
OfTVPlaylistNewIE,
|
||||
)
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .olympics import OlympicsReplayIE
|
||||
|
@ -1,54 +1,132 @@
|
||||
from .common import InfoExtractor
|
||||
from .zype import ZypeIE
|
||||
from ..utils import traverse_obj
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class OfTVNewIE(InfoExtractor):
|
||||
IE_NAME = 'oftv:video-new'
|
||||
_VALID_URL = r'https?://(?:www\.)?of\.tv/v/(?P<id>[^#/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://of.tv/v/zjtc6',
|
||||
'md5': 'fcdffb9e0a375851d53a939b45313a8c',
|
||||
'info_dict': {
|
||||
'id': 'zjtc6',
|
||||
'ext': 'mp4',
|
||||
'title': 'S1E1: Monte Cristo Sandwich',
|
||||
'thumbnails': 'mincount:3',
|
||||
'thumbnail': r're:https://.+\.(jpg|webp)',
|
||||
'description': 'md5:89a6a3404540e9d5a4ec9ffa63a85d4d',
|
||||
'duration': 1423,
|
||||
'timestamp': 1652394900,
|
||||
'upload_date': '20220512',
|
||||
'creators': 'count:4',
|
||||
'channel': 'This is Fire',
|
||||
'channel_id': '9iGia',
|
||||
'channel_url': 'https://of.tv/c/this-is-fire',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_data(self, json_data):
|
||||
thumbnails = []
|
||||
video_id = traverse_obj(json_data, ('unique_id', {str}))
|
||||
for k, v in json_data.get('thumbnail', {}).items():
|
||||
thumbnails.append({'url': v, 'preference': int(k)})
|
||||
m3u8_url = traverse_obj(json_data, ('video_src', {url_or_none}))
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(json_data, {
|
||||
'title': ('title', {str}),
|
||||
'alt_title': ('long_title', {str_or_none}),
|
||||
'description': ('description', {str_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('published_at', {unified_timestamp}),
|
||||
'creators': ('featured_creators', ..., (('nickname', 'of_handle'))),
|
||||
'season': ('season', {str_or_none}),
|
||||
'episode': ('episode', {str_or_none}),
|
||||
'channel': ('creator', 'channel_name', {str_or_none}),
|
||||
'channel_id': ('creator', 'unique_id', {str_or_none}),
|
||||
'channel_url': ('creator', 'oftv_handle', {urljoin('https://of.tv/c/')}),
|
||||
}),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(f'https://api.of.tv/v0/pages/videos/{video_id}', video_id)['data']['video']
|
||||
return self._extract_data(json_data)
|
||||
|
||||
|
||||
class OfTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?of\.tv/video/(?P<id>\w+)'
|
||||
IE_NAME = 'oftv:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?of\.tv/video/(?P<id>[^#!/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://of.tv/video/627d7d95b353db0001dadd1a',
|
||||
'md5': 'cb9cd5db3bb9ee0d32bfd7e373d6ef0a',
|
||||
'md5': 'fcdffb9e0a375851d53a939b45313a8c',
|
||||
'info_dict': {
|
||||
'id': '627d7d95b353db0001dadd1a',
|
||||
'id': 'zjtc6',
|
||||
'ext': 'mp4',
|
||||
'title': 'E1: Jacky vs Eric',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'average_rating': 0,
|
||||
'description': 'md5:dd16e3e2a8d27d922e7a989f85986853',
|
||||
'display_id': '',
|
||||
'title': 'S1E1: Monte Cristo Sandwich',
|
||||
'thumbnails': 'mincount:3',
|
||||
'thumbnail': r're:https://.+\.(jpg|webp)',
|
||||
'description': 'md5:89a6a3404540e9d5a4ec9ffa63a85d4d',
|
||||
'duration': 1423,
|
||||
'timestamp': 1652391300,
|
||||
'timestamp': 1652394900,
|
||||
'upload_date': '20220512',
|
||||
'view_count': 0,
|
||||
'creator': 'This is Fire',
|
||||
'creators': 'count:4',
|
||||
'channel': 'This is Fire',
|
||||
'channel_id': '9iGia',
|
||||
'channel_url': 'https://of.tv/c/this-is-fire',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = next(ZypeIE.extract_from_webpage(self._downloader, url, webpage))
|
||||
info['_type'] = 'url_transparent'
|
||||
info['creator'] = self._search_regex(r'<a[^>]+class=\"creator-name\"[^>]+>([^<]+)', webpage, 'creator')
|
||||
return info
|
||||
return self.url_result(self._og_search_url(webpage), OfTVNewIE)
|
||||
|
||||
|
||||
class OfTVPlaylistNewIE(OfTVNewIE):
|
||||
IE_NAME = 'oftv:playlist-new'
|
||||
_VALID_URL = r'https?://(?:www\.)?of\.tv/c/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://of.tv/c/this-is-fire/',
|
||||
'info_dict': {
|
||||
'id': 'this-is-fire',
|
||||
'title': 'This is Fire',
|
||||
},
|
||||
'playlist_mincount': 44,
|
||||
}]
|
||||
|
||||
def _entries(self, json_data):
|
||||
for entry in json_data.get('items', []):
|
||||
yield self._extract_data(entry)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
json_data = self._download_json(f'https://api.of.tv/v0/pages/creators/{playlist_id}', playlist_id)['data']['creator_playlist']
|
||||
return self.playlist_result(self._entries(json_data), playlist_id, traverse_obj(json_data, ('label', {str})))
|
||||
|
||||
|
||||
class OfTVPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?of\.tv/creators/(?P<id>[a-zA-Z0-9-]+)/?(?:$|[?#])'
|
||||
IE_NAME = 'oftv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?of\.tv/creators/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://of.tv/creators/this-is-fire/',
|
||||
'playlist_count': 8,
|
||||
'info_dict': {
|
||||
'id': 'this-is-fire',
|
||||
'title': 'This is Fire',
|
||||
},
|
||||
'playlist_mincount': 44,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_match = self._search_json(
|
||||
r'var\s*remaining_videos\s*=', webpage, 'oftv playlists', playlist_id, contains_pattern=r'\[.+\]')
|
||||
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(json_match, (..., 'discovery_url')), playlist_id)
|
||||
return self.url_result(self._og_search_url(webpage), OfTVPlaylistNewIE)
|
||||
|
Loading…
Reference in New Issue
Block a user