From ee8133b07779ee15f116daed4a81ee49eb7238ab Mon Sep 17 00:00:00 2001
From: Matteo Abis <1423701+Enucatl@users.noreply.github.com>
Date: Sat, 21 Jun 2025 09:44:21 +0200
Subject: [PATCH] codeql fixes
---
yt_dlp/extractor/threads.py | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/yt_dlp/extractor/threads.py b/yt_dlp/extractor/threads.py
index 0aa5ca6dc..3f01c0f34 100644
--- a/yt_dlp/extractor/threads.py
+++ b/yt_dlp/extractor/threads.py
@@ -100,6 +100,7 @@ class ThreadsIE(InfoExtractor):
'channel_is_verified': False,
'channel_url': 'https://www.threads.com/@enucatl',
'description': '',
+ 'ext': 'mp4',
'id': 'DLIrVcmPuFA7g5tn9OzPjsA-R8qU2HPJv_FzCo0',
'like_count': int,
'timestamp': 1745582191,
@@ -126,13 +127,15 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, post_id, note='Downloading post page')
json_data = None
- # Match single scripts
- for script in re.findall(r'', webpage, re.DOTALL | re.IGNORECASE):
- # Heuristic check: if the script doesn't contain "RelayPrefetchedStreamCache" and the post_id,
- # it's definitely not the one we want. Skip it quickly.
- if 'RelayPrefetchedStreamCache' not in script or post_id not in script:
- continue
+ json_scripts = re.findall(
+ r'