1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 17:08:32 +00:00

codeql fixes

This commit is contained in:
Matteo Abis 2025-06-21 09:44:21 +02:00
parent 5386879dd5
commit ee8133b077
No known key found for this signature in database

View File

@ -100,6 +100,7 @@ class ThreadsIE(InfoExtractor):
'channel_is_verified': False, 'channel_is_verified': False,
'channel_url': 'https://www.threads.com/@enucatl', 'channel_url': 'https://www.threads.com/@enucatl',
'description': '', 'description': '',
'ext': 'mp4',
'id': 'DLIrVcmPuFA7g5tn9OzPjsA-R8qU2HPJv_FzCo0', 'id': 'DLIrVcmPuFA7g5tn9OzPjsA-R8qU2HPJv_FzCo0',
'like_count': int, 'like_count': int,
'timestamp': 1745582191, 'timestamp': 1745582191,
@ -126,13 +127,15 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, post_id, note='Downloading post page') webpage = self._download_webpage(url, post_id, note='Downloading post page')
json_data = None json_data = None
# Match single scripts
for script in re.findall(r'<script[^>]*>(.*?)</script>', webpage, re.DOTALL | re.IGNORECASE):
# Heuristic check: if the script doesn't contain "RelayPrefetchedStreamCache" and the post_id,
# it's definitely not the one we want. Skip it quickly.
if 'RelayPrefetchedStreamCache' not in script or post_id not in script:
continue
json_scripts = re.findall(
r'<script type="application/json"[^>]*?\sdata-sjs[^>]*?>(.*?)<\s*/script\s*>',
webpage,
re.DOTALL | re.IGNORECASE,
)
for script in json_scripts:
if post_id not in script or 'RelayPrefetchedStreamCache' not in script:
continue
# This script is a candidate. Try to parse it. # This script is a candidate. Try to parse it.
# We use fatal=False because we expect some candidates to fail parsing. # We use fatal=False because we expect some candidates to fail parsing.
candidate_json = self._search_json(r'"result":', script, 'result data', post_id, fatal=False) candidate_json = self._search_json(r'"result":', script, 'result data', post_id, fatal=False)
@ -174,16 +177,17 @@ def _real_extract(self, url):
# This metadata applies to the whole post (the playlist). # This metadata applies to the whole post (the playlist).
uploader = traverse_obj(main_post, ('user', 'username')) uploader = traverse_obj(main_post, ('user', 'username'))
caption = traverse_obj(main_post, ('caption', 'text'))
title = ( title = (
strip_or_none(remove_end(self._html_extract_title(webpage), '• Threads')) caption
or traverse_obj(main_post, ('caption', 'text')) or strip_or_none(remove_end(self._html_extract_title(webpage), '• Threads'))
or f'Post by {uploader}' or f'Post by {uploader}'
) )
playlist_metadata = { playlist_metadata = {
'id': post_id, 'id': post_id,
'title': title, 'title': title,
'description': self._og_search_description(webpage) or traverse_obj(main_post, ('caption', 'text')), 'description': caption or self._og_search_description(webpage),
'uploader': uploader, 'uploader': uploader,
'uploader_id': traverse_obj(main_post, ('user', 'pk')), 'uploader_id': traverse_obj(main_post, ('user', 'pk')),
'uploader_url': f'https://www.threads.com/@{uploader}', 'uploader_url': f'https://www.threads.com/@{uploader}',