diff --git a/yt_dlp/extractor/shiey.py b/yt_dlp/extractor/shiey.py index 55f5665b7..4e3a815fc 100644 --- a/yt_dlp/extractor/shiey.py +++ b/yt_dlp/extractor/shiey.py @@ -1,24 +1,34 @@ +import json + from .common import InfoExtractor from .vimeo import VimeoIE +from ..utils import extract_attributes +from ..utils.traversal import find_element, traverse_obj class ShieyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?shiey\.com/videos/v/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?shiey\.com/videos/v/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.shiey.com/videos/v/train-journey-to-edge-of-serbia-ep-2', 'info_dict': { - 'id': 'train-journey-to-edge-of-serbia-ep-2', - 'title': 'Train Journey to the Edge of Serbia - Ep. 2', - 'uploader': 'Shiey', - }, - 'params': { - 'skip_download': True, + 'id': '1103409448', + 'ext': 'mp4', + 'title': 'Train Journey To Edge of Serbia (Ep. 2)', + 'uploader': 'shiey', + 'uploader_url': '', + 'duration': 1364, + 'thumbnail': r're:^https?://.+', }, + 'params': {'skip_download': True}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - vimeo_url = self._search_regex(r'iframe src=\\"(https?://player\.vimeo\.com/video/[^\\&]+)', webpage, 'vimeo url') - return self.url_result(VimeoIE._smuggle_referrer(vimeo_url, url), VimeoIE) + oembed_html = traverse_obj(webpage, ( + {find_element(attr='data-controller', value='VideoEmbed', html=True)}, + {extract_attributes}, 'data-config-embed-video', {json.loads}, 'oembedHtml', {str})) + + return self.url_result(VimeoIE._extract_url(url, oembed_html), VimeoIE)