From d6f717c1e8ebf208ca1c6c29f8fa69e290825748 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Thu, 19 Jun 2025 23:20:12 +0300 Subject: [PATCH] [thehighwire.com] Create exctractor's file --- yt_dlp/extractor/thehighwire.py | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 yt_dlp/extractor/thehighwire.py diff --git a/yt_dlp/extractor/thehighwire.py b/yt_dlp/extractor/thehighwire.py new file mode 100644 index 0000000000..ffd86f91ee --- /dev/null +++ b/yt_dlp/extractor/thehighwire.py @@ -0,0 +1,42 @@ +from .common import InfoExtractor + +class TheHighWireIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?thehighwire\.com/ark-videos/(?P[^/?#]+)' + _EMBED_URL = 'https://app.arkengine.com/embed/{id}' + _TESTS = [{ + 'url': 'https://thehighwire.com/ark-videos/the-deposition-of-stanley-plotkin/', + 'info_dict': { + 'id': 'clllgcra301z4ik01x8cwhfu2', + 'title': 'THE DEPOSITION OF STANLEY PLOTKIN', + 'ext': 'mp4', + }, + 'params': {'skip_download': True}, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + iframe_url = self._search_regex( + r']+src=["\'](https?://app\.arkengine\.com/embed/[^"\']+)', + webpage, 'iframe URL') + video_id = self._search_regex( + r'embed/([a-zA-Z0-9]+)', iframe_url, 'video ID') + + player_page = self._download_webpage( + self._EMBED_URL.format(id=video_id), video_id, + note='Downloading player page') + + m3u8_url = self._search_regex( + r']+src=["\']([^"\']+\.m3u8)', + player_page, 'm3u8 URL') + + title = self._og_search_title(webpage, default=None) or self._html_search_meta( + 'og:title', webpage, 'title', default=video_id) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'), + }