diff --git a/yt_dlp/extractor/threads.py b/yt_dlp/extractor/threads.py index 0aa5ca6dc..3f01c0f34 100644 --- a/yt_dlp/extractor/threads.py +++ b/yt_dlp/extractor/threads.py @@ -100,6 +100,7 @@ class ThreadsIE(InfoExtractor): 'channel_is_verified': False, 'channel_url': 'https://www.threads.com/@enucatl', 'description': '', + 'ext': 'mp4', 'id': 'DLIrVcmPuFA7g5tn9OzPjsA-R8qU2HPJv_FzCo0', 'like_count': int, 'timestamp': 1745582191, @@ -126,13 +127,15 @@ def _real_extract(self, url): webpage = self._download_webpage(url, post_id, note='Downloading post page') json_data = None - # Match single scripts - for script in re.findall(r']*>(.*?)', webpage, re.DOTALL | re.IGNORECASE): - # Heuristic check: if the script doesn't contain "RelayPrefetchedStreamCache" and the post_id, - # it's definitely not the one we want. Skip it quickly. - if 'RelayPrefetchedStreamCache' not in script or post_id not in script: - continue + json_scripts = re.findall( + r'