From b343713d97f879a31eb3280eec9160b2f8060be8 Mon Sep 17 00:00:00 2001 From: Marcos Del Sol Vives Date: Tue, 20 May 2025 04:05:09 +0200 Subject: [PATCH] [pp/ffmpeg] add workaround for ffmpeg issue 11493 (#6918, #9138 and #9618) --- yt_dlp/postprocessor/ffmpeg.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 59a49aa57..34133a1c1 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -9,6 +9,7 @@ import time from .common import PostProcessor +from .. import webvtt from ..compat import imghdr from ..utils import ( MEDIA_EXTENSIONS, @@ -626,14 +627,33 @@ def run(self, info): sub_ext = sub_info['ext'] if sub_ext == 'json': self.report_warning('JSON subtitles cannot be embedded') - elif ext != 'webm' or (ext == 'webm' and sub_ext == 'vtt'): + elif ext in ('mkv', 'webm') and sub_ext == 'vtt': + # This is a workaround for ffmpeg bug https://trac.ffmpeg.org/ticket/11493 + # If the WebVTT contains empty text cues, it'll corrupt remuxed Matroska files, dropping frames + sub_langs.append(lang) + sub_names.append(sub_info.get('name')) + sub_filename = sub_info['filepath'] + + with open(sub_info['filepath'], 'rb') as f: + vtt_bytes = f.read() + blocks = list(webvtt.parse_fragment(vtt_bytes)) + + nonempty_blocks = [block for block in blocks if not isinstance(block, webvtt.CueBlock) or block.text.strip() != ''] + if len(blocks) != len(nonempty_blocks): + self.report_warning(f'Filtering WebVTT for {lang} subtitle due to ffmpeg bug #11493') + sub_filename = f'{sub_filename}_filtered.vtt' + with open(sub_filename, 'w') as f: + for block in nonempty_blocks: + block.write_into(f) + sub_filenames.append(sub_filename) + elif ext == 'webm': + if not webm_vtt_warn: + webm_vtt_warn = True + self.report_warning('Only WebVTT subtitles can be embedded in webm files') + else: sub_langs.append(lang) sub_names.append(sub_info.get('name')) sub_filenames.append(sub_info['filepath']) - else: - if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': - webm_vtt_warn = True - self.report_warning('Only WebVTT subtitles can be embedded in webm files') if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass': mp4_ass_warn = True self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')