diff --git a/supportedsites.md b/supportedsites.md index b3fe01173..d7baddf50 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -1492,6 +1492,7 @@ # Supported sites - **ThisVid** - **ThisVidMember** - **ThisVidPlaylist** + - **Threads** - **ThreeSpeak** - **ThreeSpeakUser** - **TikTok** diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fbbd9571f..a5a157871 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2117,6 +2117,10 @@ ThisVidMemberIE, ThisVidPlaylistIE, ) +from .threads import ( + ThreadsIE, + ThreadsIOSIE, +) from .threeqsdn import ThreeQSDNIE from .threespeak import ( ThreeSpeakIE, diff --git a/yt_dlp/extractor/threads.py b/yt_dlp/extractor/threads.py new file mode 100644 index 000000000..3f01c0f34 --- /dev/null +++ b/yt_dlp/extractor/threads.py @@ -0,0 +1,282 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + remove_end, + strip_or_none, + traverse_obj, +) + + +class ThreadsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?threads\.(?:net|com)/(?:@[^/]+/)?(?:post|t)/(?P[^/?#&]+)' + _NETRC_MACHINE = 'threads' + _TESTS = [ + { + 'note': 'Post with single video, with username and post', + 'url': 'https://www.threads.com/@zuck/post/DHV7vTivqWD', + 'info_dict': { + 'channel': 'zuck', + 'channel_is_verified': True, + 'channel_url': 'https://www.threads.com/@zuck', + 'description': 'Me finding out Llama hit 1 BILLION downloads.', + 'ext': 'mp4', + 'id': 'DHV7vTivqWD', + 'like_count': int, + 'thumbnail': str, + 'timestamp': 1742305717, + 'title': 'Me finding out Llama hit 1 BILLION downloads.', + 'upload_date': '20250318', + 'uploader': 'zuck', + 'uploader_id': '63055343223', + 'uploader_url': 'https://www.threads.com/@zuck', + }, + }, + { + 'note': 'Post with single video, without username and with t', + 'url': 'https://www.threads.com/t/DHV7vTivqWD', + 'info_dict': { + 'channel': 'zuck', + 'channel_is_verified': True, + 'channel_url': 'https://www.threads.com/@zuck', + 'description': 'Me finding out Llama hit 1 BILLION downloads.', + 'ext': 'mp4', + 'id': 'DHV7vTivqWD', + 'like_count': int, + 'thumbnail': str, + 'timestamp': 1742305717, + 'title': 'Me finding out Llama hit 1 BILLION downloads.', + 'upload_date': '20250318', + 'uploader': 'zuck', + 'uploader_id': '63055343223', + 'uploader_url': 'https://www.threads.com/@zuck', + }, + }, + { + 'note': 'Post with carousel 2 images and 1 video', + 'url': 'https://www.threads.com/@zuck/post/DJDhoQfxb43', + 'info_dict': { + 'channel': 'zuck', + 'channel_is_verified': True, + 'channel_url': 'https://www.threads.com/@zuck', + 'description': 'md5:9146c2c42fd53aba9090f61ccfd64fc8', + 'id': 'DJDhoQfxb43', + 'like_count': int, + 'timestamp': 1745982529, + 'title': 'md5:9146c2c42fd53aba9090f61ccfd64fc8', + 'upload_date': '20250430', + 'uploader': 'zuck', + 'uploader_id': '63055343223', + 'uploader_url': 'https://www.threads.com/@zuck', + }, + 'playlist_count': 3, + }, + { + 'note': 'Post with 1 image', + 'url': 'https://www.threads.com/@zuck/post/DI3mC0GxkYA', + 'info_dict': { + 'channel': 'zuck', + 'channel_is_verified': True, + 'channel_url': 'https://www.threads.com/@zuck', + 'description': 'md5:e292006574f5deb5552c1ad677cee8dd', + 'ext': 'webp', + 'id': 'DI3mC0GxkYA', + 'like_count': int, + 'timestamp': 1745582191, + 'title': 'md5:e292006574f5deb5552c1ad677cee8dd', + 'upload_date': '20250425', + 'uploader': 'zuck', + 'uploader_id': '63055343223', + 'uploader_url': 'https://www.threads.com/@zuck', + }, + }, + { + 'note': 'Private Post', + 'url': 'https://www.threads.com/@enucatl/post/DLIrVcmPuFA7g5tn9OzPjsA-R8qU2HPJv_FzCo0', + 'info_dict': { + 'channel': 'enucatl', + 'channel_is_verified': False, + 'channel_url': 'https://www.threads.com/@enucatl', + 'description': '', + 'ext': 'mp4', + 'id': 'DLIrVcmPuFA7g5tn9OzPjsA-R8qU2HPJv_FzCo0', + 'like_count': int, + 'timestamp': 1745582191, + 'title': '', + 'upload_date': '20250620', + 'uploader': 'enucatl', + 'uploader_id': '63055343223', + 'uploader_url': 'https://www.threads.com/@enucatl', + }, + 'skip': 'private account, requires authentication', + }, + ] + + def _perform_login(self, username, password): + # We are not implementing direct login. Cookies are preferred. + self.raise_login_required( + 'Login with username/password is not supported. ' + 'Use --cookies or --cookies-from-browser to provide authentication.', + method='cookies', + ) + + def _real_extract(self, url): + post_id = self._match_id(url) + webpage = self._download_webpage(url, post_id, note='Downloading post page') + + json_data = None + + json_scripts = re.findall( + r'