diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fbbd9571f..4c2778cf7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2101,6 +2101,7 @@ TheGuardianPodcastIE, TheGuardianPodcastPlaylistIE, ) +from .thehighwire import TheHighWireIE from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( diff --git a/yt_dlp/extractor/thehighwire.py b/yt_dlp/extractor/thehighwire.py new file mode 100644 index 000000000..8b596143f --- /dev/null +++ b/yt_dlp/extractor/thehighwire.py @@ -0,0 +1,43 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + url_or_none, +) +from ..utils.traversal import ( + find_element, + require, + traverse_obj, +) + + +class TheHighWireIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?thehighwire\.com/ark-videos/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://thehighwire.com/ark-videos/the-deposition-of-stanley-plotkin/', + 'info_dict': { + 'id': 'the-deposition-of-stanley-plotkin', + 'ext': 'mp4', + 'title': 'THE DEPOSITION OF STANLEY PLOTKIN', + 'description': 'md5:6d0be4f1181daaa10430fd8b945a5e54', + 'thumbnail': r're:https?://static\.arkengine\.com/video/.+\.jpg', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + embed_url = traverse_obj(webpage, ( + {find_element(cls='ark-video-embed', html=True)}, + {extract_attributes}, 'src', {url_or_none}, {require('embed URL')})) + embed_page = self._download_webpage(embed_url, display_id) + + return { + 'id': display_id, + **traverse_obj(webpage, { + 'title': ({find_element(cls='section-header')}, {clean_html}), + 'description': ({find_element(cls='episode-description__copy')}, {clean_html}), + }), + **self._parse_html5_media_entries(embed_url, embed_page, display_id, m3u8_id='hls')[0], + }