1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-27 07:38:30 +00:00

fix regex to match kaltura extractor

This commit is contained in:
frank 2025-07-02 09:53:32 -05:00
parent dd21959466
commit 025c410414

View File

@ -2,7 +2,7 @@
class UnitedNationWebTVExtractorIE(KalturaIE):
_VALID_URL = r'https?://webtv.un.org/(ar|zh|en|fr|ru|es)/asset/[0-9A-z]{3}/(?P<id>[0-9A-z]{10})'
_VALID_URL = r'https?://webtv.un.org/(ar|zh|en|fr|ru|es)/asset/\w+/(?P<id>\w+)'
_TESTS = [{
'url': 'https://webtv.un.org/en/asset/k1o/k1o7stmi6p',
'md5': 'b2f8b3030063298ae841b4b7ddc01477',
@ -23,19 +23,28 @@ def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._html_search_regex(
r'partnerId: ([0-9]+)',
r'partnerId:\s*(\w+)',
webpage,
'partner_id',
)
entry_id = self._html_search_regex(
r'const kentryID = \'([0-9A-z]{10})\';',
r'const\s+kentryID\s*=\s*["\'](\w+)["\'];',
webpage,
'kentry_id',
)
kaltura_api_response = self._get_video_info(entry_id, partner_id)
try:
kaltura_url = kaltura_api_response[1].get('dataUrl', None)
except IndexError:
return self.url_result(
f'kaltura:{partner_id}:{entry_id}',
KalturaIE.ie_key(),
)
kaltura_id = self._search_regex(
r'http://cdnapi.kaltura.com/p/[0-9]+/sp/[0-9]+/playManifest/entryId/([0-9A-z]{10})/format/url/protocol/http',
kaltura_api_response[1].get('dataUrl'),
r'http://cdnapi.kaltura.com/p/\w+/sp/\w+/playManifest/entryId/(\w+)/format/url/protocol/http',
kaltura_url,
'kaltura_id',
)