diff --git a/yt_dlp/extractor/filmarchiv.py b/yt_dlp/extractor/filmarchiv.py index 1e62c40ae3..9d2eb98f07 100644 --- a/yt_dlp/extractor/filmarchiv.py +++ b/yt_dlp/extractor/filmarchiv.py @@ -1,4 +1,6 @@ from .common import InfoExtractor +from ..utils import clean_html +from ..utils.traversal import find_elements, traverse_obj class FilmArchivIE(InfoExtractor): @@ -24,9 +26,17 @@ def _real_extract(self, url): r']*>\s*(.+?)\s*', webpage, 'title') - description = self._html_search_regex( - r'
\s*
\s*

\s*(.+?)\s*

', - webpage, 'description') + description = traverse_obj(webpage, ( + {find_elements( + tag='div', + attr='class', value=r'[^\'"]*(?<=[\'"\s])border-base-content(?=[\'"\s])[^\'"]*', + html=False, regex=True)}, ..., + {find_elements( + tag='div', + attr='class', value=r'[^\'"]*(?<=[\'"\s])prose(?=[\'"\s])[^\'"]*', + html=False, regex=True)}, ..., + {clean_html}, any, + )) og_img = self._html_search_meta('og:image', webpage, 'image URL', fatal=True) prefix = self._search_regex(