From 87ba39405ba82585b9ef2c437f699468b4127ac7 Mon Sep 17 00:00:00 2001 From: delta <4elta@users.noreply.github.com> Date: Wed, 18 Jun 2025 11:19:13 +0200 Subject: [PATCH] Apply suggestions from code review Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> --- yt_dlp/extractor/filmarchiv.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/filmarchiv.py b/yt_dlp/extractor/filmarchiv.py index 1e62c40ae3..9d2eb98f07 100644 --- a/yt_dlp/extractor/filmarchiv.py +++ b/yt_dlp/extractor/filmarchiv.py @@ -1,4 +1,6 @@ from .common import InfoExtractor +from ..utils import clean_html +from ..utils.traversal import find_elements, traverse_obj class FilmArchivIE(InfoExtractor): @@ -24,9 +26,17 @@ def _real_extract(self, url): r']*>\s*(.+?)\s*', webpage, 'title') - description = self._html_search_regex( - r'
\s*
\s*

\s*(.+?)\s*

', - webpage, 'description') + description = traverse_obj(webpage, ( + {find_elements( + tag='div', + attr='class', value=r'[^\'"]*(?<=[\'"\s])border-base-content(?=[\'"\s])[^\'"]*', + html=False, regex=True)}, ..., + {find_elements( + tag='div', + attr='class', value=r'[^\'"]*(?<=[\'"\s])prose(?=[\'"\s])[^\'"]*', + html=False, regex=True)}, ..., + {clean_html}, any, + )) og_img = self._html_search_meta('og:image', webpage, 'image URL', fatal=True) prefix = self._search_regex(