diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 34c98b537..1a07ff7ef 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -647,6 +647,7 @@ ) from .fczenit import FczenitIE from .fifa import FifaIE +from .filmarchiv import FilmArchivIE from .filmon import ( FilmOnChannelIE, FilmOnIE, diff --git a/yt_dlp/extractor/filmarchiv.py b/yt_dlp/extractor/filmarchiv.py new file mode 100644 index 000000000..ceaf4d734 --- /dev/null +++ b/yt_dlp/extractor/filmarchiv.py @@ -0,0 +1,46 @@ +from .common import InfoExtractor +from ..utils import determine_ext + + +class FilmArchivIE(InfoExtractor): + IE_NAME = 'FILMARCHIV ON' + _VALID_URL = r'https?://(?:www\.)?filmarchiv\.at/(?:de|en)/filmarchiv-on/video/(?P[0-9a-zA-Z_]+)' + _TESTS = [{ + 'url': 'https://www.filmarchiv.at/de/filmarchiv-on/video/f_0305p7xKrXUPBwoNE9x6mh', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': 'f_0305p7xKrXUPBwoNE9x6mh', + 'ext': 'mkv', + 'title': 'Der Wurstelprater zur Kaiserzeit', + 'description': 'md5:9843f92df5cc9a4975cee7aabcf6e3b2', + 'thumbnail': 'https://img.filmarchiv.at/unsafe/1024x1024/videostatic/f_0305/p7xKrXUPBwoNE9x6mh_v1/poster.jpg', + } + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + + title = self._html_search_regex( + r']+>\s*(.+?)\s*', + webpage, 'title') + + description = self._html_search_regex( + r'
\s*
\s*

\s*(.+?)\s*

', + webpage, 'description') + + bucket, video_id, version = self._html_search_regex( + r'', + webpage, 'bucket, video_id, version', group=('bucket', 'video_id', 'version')) + + playlist_url = f'https://cdn.filmarchiv.at/{bucket}/{video_id}_{version}_sv1/playlist.m3u8' + formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, id, fatal=False) + + return { + 'id': id, + 'title': title, + 'description': description, + 'thumbnail': f'https://img.filmarchiv.at/unsafe/1024x1024/videostatic/{bucket}/{video_id}/poster.jpg', + 'formats': formats, + 'subtitles': subtitles, + }