mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[extractor/reddit] Support cookies and short URLs (#6825)
Closes #6665, Closes #6753 Authored by: bashonly
This commit is contained in:
		| @@ -1,4 +1,3 @@ | ||||
| import random | ||||
| import urllib.parse | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| @@ -14,7 +13,7 @@ from ..utils import ( | ||||
| 
 | ||||
| 
 | ||||
| class RedditIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/(?P<slug>(?:r|user)/[^/]+/comments/(?P<id>[^/?#&]+))' | ||||
|     _VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', | ||||
|         'info_dict': { | ||||
| @@ -109,6 +108,46 @@ class RedditIE(InfoExtractor): | ||||
|             'age_limit': 0, | ||||
|             'channel_id': 'dumbfuckers_club', | ||||
|         }, | ||||
|     }, { | ||||
|         # post link without subreddit | ||||
|         'url': 'https://www.reddit.com/comments/124pp33', | ||||
|         'md5': '15eec9d828adcef4468b741a7e45a395', | ||||
|         'info_dict': { | ||||
|             'id': 'antsenjc2jqa1', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': '124pp33', | ||||
|             'title': 'Harmless prank of some old friends', | ||||
|             'uploader': 'Dudezila', | ||||
|             'channel_id': 'ContagiousLaughter', | ||||
|             'duration': 17, | ||||
|             'upload_date': '20230328', | ||||
|             'timestamp': 1680012043, | ||||
|             'thumbnail': r're:^https?://.*\.(?:jpg|png)', | ||||
|             'age_limit': 0, | ||||
|             'comment_count': int, | ||||
|             'dislike_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         # quarantined subreddit post | ||||
|         'url': 'https://old.reddit.com/r/GenZedong/comments/12fujy3/based_hasan/', | ||||
|         'md5': '3156ea69e3c1f1b6259683c5abd36e71', | ||||
|         'info_dict': { | ||||
|             'id': '8bwtclfggpsa1', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': '12fujy3', | ||||
|             'title': 'Based Hasan?', | ||||
|             'uploader': 'KingNigelXLII', | ||||
|             'channel_id': 'GenZedong', | ||||
|             'duration': 16, | ||||
|             'upload_date': '20230408', | ||||
|             'timestamp': 1680979138, | ||||
|             'age_limit': 0, | ||||
|             'comment_count': int, | ||||
|             'dislike_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|         'skip': 'Requires account that has opted-in to the GenZedong subreddit', | ||||
|     }, { | ||||
|         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', | ||||
|         'only_matching': True, | ||||
| @@ -137,21 +176,26 @@ class RedditIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _gen_session_id(): | ||||
|         id_length = 16 | ||||
|         rand_max = 1 << (id_length * 4) | ||||
|         return '%0.*x' % (id_length, random.randrange(rand_max)) | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id') | ||||
|         host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id') | ||||
| 
 | ||||
|         self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id()) | ||||
|         self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') | ||||
|         data = self._download_json(f'https://{subdomain}reddit.com/{slug}/.json', video_id, fatal=False) | ||||
|         data = self._download_json( | ||||
|             f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403) | ||||
|         if not data: | ||||
|             # Fall back to old.reddit.com in case the requested subdomain fails | ||||
|             data = self._download_json(f'https://old.reddit.com/{slug}/.json', video_id) | ||||
|             fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com' | ||||
|             self.to_screen(f'{host} request failed, retrying with {fallback_host}') | ||||
|             data = self._download_json( | ||||
|                 f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403) | ||||
| 
 | ||||
|         if traverse_obj(data, 'error') == 403: | ||||
|             reason = data.get('reason') | ||||
|             if reason == 'quarantined': | ||||
|                 self.raise_login_required('Quarantined subreddit; an account that has opted in is required') | ||||
|             elif reason == 'private': | ||||
|                 self.raise_login_required('Private subreddit; an account that has been approved is required') | ||||
|             else: | ||||
|                 raise ExtractorError(f'HTTP Error 403 Forbidden; reason given: {reason}') | ||||
| 
 | ||||
|         data = data[0]['data']['children'][0]['data'] | ||||
|         video_url = data['url'] | ||||
| 
 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bashonly
					bashonly