mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	Closes #1605, Closes #5233, Closes #1249 Authored by: Grub4K, nixxo, bashonly, pukkandan Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: nixxo <nixxo@protonmail.com>
This commit is contained in:
		| @@ -1765,6 +1765,8 @@ The following extractors use this feature: | |||||||
| #### rokfinchannel | #### rokfinchannel | ||||||
| * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` | * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` | ||||||
| 
 | 
 | ||||||
|  | #### twitter | ||||||
|  | * `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided | ||||||
| 
 | 
 | ||||||
| NOTE: These options may be changed/removed in the future without concern for backward compatibility | NOTE: These options may be changed/removed in the future without concern for backward compatibility | ||||||
| 
 | 
 | ||||||
|   | |||||||
| @@ -1968,6 +1968,7 @@ from .twitter import ( | |||||||
|     TwitterIE, |     TwitterIE, | ||||||
|     TwitterAmplifyIE, |     TwitterAmplifyIE, | ||||||
|     TwitterBroadcastIE, |     TwitterBroadcastIE, | ||||||
|  |     TwitterSpacesIE, | ||||||
|     TwitterShortenerIE, |     TwitterShortenerIE, | ||||||
| ) | ) | ||||||
| from .udemy import ( | from .udemy import ( | ||||||
|   | |||||||
| @@ -1,9 +1,11 @@ | |||||||
|  | import json | ||||||
| import re | import re | ||||||
|  | import urllib.error | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .periscope import PeriscopeBaseIE, PeriscopeIE | from .periscope import PeriscopeBaseIE, PeriscopeIE | ||||||
|  | from ..compat import functools  # isort: split | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|     compat_HTTPError, |  | ||||||
|     compat_parse_qs, |     compat_parse_qs, | ||||||
|     compat_urllib_parse_unquote, |     compat_urllib_parse_unquote, | ||||||
|     compat_urllib_parse_urlparse, |     compat_urllib_parse_urlparse, | ||||||
| @@ -18,6 +20,7 @@ from ..utils import ( | |||||||
|     str_or_none, |     str_or_none, | ||||||
|     strip_or_none, |     strip_or_none, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
|  |     try_call, | ||||||
|     try_get, |     try_get, | ||||||
|     unified_timestamp, |     unified_timestamp, | ||||||
|     update_url_query, |     update_url_query, | ||||||
| @@ -28,8 +31,12 @@ from ..utils import ( | |||||||
| 
 | 
 | ||||||
| class TwitterBaseIE(InfoExtractor): | class TwitterBaseIE(InfoExtractor): | ||||||
|     _API_BASE = 'https://api.twitter.com/1.1/' |     _API_BASE = 'https://api.twitter.com/1.1/' | ||||||
|  |     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/' | ||||||
|  |     _TOKENS = { | ||||||
|  |         'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None, | ||||||
|  |         'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None, | ||||||
|  |     } | ||||||
|     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/' |     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/' | ||||||
|     _GUEST_TOKEN = None |  | ||||||
| 
 | 
 | ||||||
|     def _extract_variant_formats(self, variant, video_id): |     def _extract_variant_formats(self, variant, video_id): | ||||||
|         variant_url = variant.get('url') |         variant_url = variant.get('url') | ||||||
| @@ -81,28 +88,73 @@ class TwitterBaseIE(InfoExtractor): | |||||||
|                 'height': int(m.group('height')), |                 'height': int(m.group('height')), | ||||||
|             }) |             }) | ||||||
| 
 | 
 | ||||||
|     def _call_api(self, path, video_id, query={}): |     @functools.cached_property | ||||||
|         headers = { |     def is_logged_in(self): | ||||||
|             'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA', |         return bool(self._get_cookies(self._API_BASE).get('auth_token')) | ||||||
|         } | 
 | ||||||
|         token = self._get_cookies(self._API_BASE).get('ct0') |     def _call_api(self, path, video_id, query={}, graphql=False): | ||||||
|         if token: |         cookies = self._get_cookies(self._API_BASE) | ||||||
|             headers['x-csrf-token'] = token.value |         headers = {} | ||||||
|         if not self._GUEST_TOKEN: | 
 | ||||||
|             self._GUEST_TOKEN = self._download_json( |         csrf_cookie = cookies.get('ct0') | ||||||
|                 self._API_BASE + 'guest/activate.json', video_id, |         if csrf_cookie: | ||||||
|                 'Downloading guest token', data=b'', |             headers['x-csrf-token'] = csrf_cookie.value | ||||||
|                 headers=headers)['guest_token'] | 
 | ||||||
|         headers['x-guest-token'] = self._GUEST_TOKEN |         if self.is_logged_in: | ||||||
|         try: |             headers.update({ | ||||||
|             return self._download_json( |                 'x-twitter-auth-type': 'OAuth2Session', | ||||||
|                 self._API_BASE + path, video_id, headers=headers, query=query) |                 'x-twitter-client-language': 'en', | ||||||
|         except ExtractorError as e: |                 'x-twitter-active-user': 'yes', | ||||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: |             }) | ||||||
|                 raise ExtractorError(self._parse_json( | 
 | ||||||
|                     e.cause.read().decode(), |         result, last_error = None, None | ||||||
|                     video_id)['errors'][0]['message'], expected=True) |         for bearer_token in self._TOKENS: | ||||||
|             raise |             headers['Authorization'] = f'Bearer {bearer_token}' | ||||||
|  | 
 | ||||||
|  |             if not self.is_logged_in: | ||||||
|  |                 if not self._TOKENS[bearer_token]: | ||||||
|  |                     headers.pop('x-guest-token', None) | ||||||
|  |                     guest_token_response = self._download_json( | ||||||
|  |                         self._API_BASE + 'guest/activate.json', video_id, | ||||||
|  |                         'Downloading guest token', data=b'', headers=headers) | ||||||
|  | 
 | ||||||
|  |                     self._TOKENS[bearer_token] = guest_token_response.get('guest_token') | ||||||
|  |                     if not self._TOKENS[bearer_token]: | ||||||
|  |                         raise ExtractorError('Could not retrieve guest token') | ||||||
|  |                 headers['x-guest-token'] = self._TOKENS[bearer_token] | ||||||
|  | 
 | ||||||
|  |             try: | ||||||
|  |                 allowed_status = {400, 403, 404} if graphql else {403} | ||||||
|  |                 result = self._download_json( | ||||||
|  |                     (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, | ||||||
|  |                     video_id, headers=headers, query=query, expected_status=allowed_status) | ||||||
|  |                 break | ||||||
|  | 
 | ||||||
|  |             except ExtractorError as e: | ||||||
|  |                 if last_error: | ||||||
|  |                     raise last_error | ||||||
|  |                 elif not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404: | ||||||
|  |                     raise | ||||||
|  |                 last_error = e | ||||||
|  |                 self.report_warning( | ||||||
|  |                     'Twitter API gave 404 response, retrying with deprecated token. ' | ||||||
|  |                     'Only one media item can be extracted') | ||||||
|  | 
 | ||||||
|  |         if result.get('errors'): | ||||||
|  |             error_message = ', '.join(set(traverse_obj( | ||||||
|  |                 result, ('errors', ..., 'message'), expected_type=str))) or 'Unknown error' | ||||||
|  |             raise ExtractorError(f'Error(s) while querying api: {error_message}', expected=True) | ||||||
|  | 
 | ||||||
|  |         assert result is not None | ||||||
|  |         return result | ||||||
|  | 
 | ||||||
|  |     def _build_graphql_query(self, media_id): | ||||||
|  |         raise NotImplementedError('Method must be implemented to support GraphQL') | ||||||
|  | 
 | ||||||
|  |     def _call_graphql_api(self, endpoint, media_id): | ||||||
|  |         data = self._build_graphql_query(media_id) | ||||||
|  |         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()} | ||||||
|  |         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TwitterCardIE(InfoExtractor): | class TwitterCardIE(InfoExtractor): | ||||||
| @@ -113,7 +165,7 @@ class TwitterCardIE(InfoExtractor): | |||||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', |             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', | ||||||
|             # MD5 checksums are different in different places |             # MD5 checksums are different in different places | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': '560070183650213889', |                 'id': '560070131976392705', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.", |                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.", | ||||||
|                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96', |                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96', | ||||||
| @@ -123,6 +175,13 @@ class TwitterCardIE(InfoExtractor): | |||||||
|                 'duration': 30.033, |                 'duration': 30.033, | ||||||
|                 'timestamp': 1422366112, |                 'timestamp': 1422366112, | ||||||
|                 'upload_date': '20150127', |                 'upload_date': '20150127', | ||||||
|  |                 'age_limit': 0, | ||||||
|  |                 'comment_count': int, | ||||||
|  |                 'tags': [], | ||||||
|  |                 'repost_count': int, | ||||||
|  |                 'like_count': int, | ||||||
|  |                 'display_id': '560070183650213889', | ||||||
|  |                 'uploader_url': 'https://twitter.com/Twitter', | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
| @@ -137,7 +196,14 @@ class TwitterCardIE(InfoExtractor): | |||||||
|                 'uploader_id': 'NASA', |                 'uploader_id': 'NASA', | ||||||
|                 'timestamp': 1437408129, |                 'timestamp': 1437408129, | ||||||
|                 'upload_date': '20150720', |                 'upload_date': '20150720', | ||||||
|  |                 'uploader_url': 'https://twitter.com/NASA', | ||||||
|  |                 'age_limit': 0, | ||||||
|  |                 'comment_count': int, | ||||||
|  |                 'like_count': int, | ||||||
|  |                 'repost_count': int, | ||||||
|  |                 'tags': ['PlutoFlyby'], | ||||||
|             }, |             }, | ||||||
|  |             'params': {'format': '[protocol=https]'} | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', |             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', | ||||||
| @@ -150,12 +216,27 @@ class TwitterCardIE(InfoExtractor): | |||||||
|                 'upload_date': '20111013', |                 'upload_date': '20111013', | ||||||
|                 'uploader': 'OMG! UBUNTU!', |                 'uploader': 'OMG! UBUNTU!', | ||||||
|                 'uploader_id': 'omgubuntu', |                 'uploader_id': 'omgubuntu', | ||||||
|  |                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ', | ||||||
|  |                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ', | ||||||
|  |                 'channel_follower_count': int, | ||||||
|  |                 'chapters': 'count:8', | ||||||
|  |                 'uploader_url': 'http://www.youtube.com/user/omgubuntu', | ||||||
|  |                 'duration': 138, | ||||||
|  |                 'categories': ['Film & Animation'], | ||||||
|  |                 'age_limit': 0, | ||||||
|  |                 'comment_count': int, | ||||||
|  |                 'availability': 'public', | ||||||
|  |                 'like_count': int, | ||||||
|  |                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg', | ||||||
|  |                 'view_count': int, | ||||||
|  |                 'tags': 'count:12', | ||||||
|  |                 'channel': 'OMG! UBUNTU!', | ||||||
|  |                 'playable_in_embed': True, | ||||||
|             }, |             }, | ||||||
|             'add_ie': ['Youtube'], |             'add_ie': ['Youtube'], | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568', |             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568', | ||||||
|             'md5': '6dabeaca9e68cbb71c99c322a4b42a11', |  | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': 'iBb2x00UVlv', |                 'id': 'iBb2x00UVlv', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
| @@ -164,9 +245,17 @@ class TwitterCardIE(InfoExtractor): | |||||||
|                 'uploader': 'ArsenalTerje', |                 'uploader': 'ArsenalTerje', | ||||||
|                 'title': 'Vine by ArsenalTerje', |                 'title': 'Vine by ArsenalTerje', | ||||||
|                 'timestamp': 1447451307, |                 'timestamp': 1447451307, | ||||||
|  |                 'alt_title': 'Vine by ArsenalTerje', | ||||||
|  |                 'comment_count': int, | ||||||
|  |                 'like_count': int, | ||||||
|  |                 'thumbnail': r're:^https?://[^?#]+\.jpg', | ||||||
|  |                 'view_count': int, | ||||||
|  |                 'repost_count': int, | ||||||
|             }, |             }, | ||||||
|             'add_ie': ['Vine'], |             'add_ie': ['Vine'], | ||||||
|         }, { |             'params': {'skip_download': 'm3u8'}, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|             'url': 'https://twitter.com/i/videos/tweet/705235433198714880', |             'url': 'https://twitter.com/i/videos/tweet/705235433198714880', | ||||||
|             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88', |             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
| @@ -180,7 +269,8 @@ class TwitterCardIE(InfoExtractor): | |||||||
|                 'upload_date': '20160303', |                 'upload_date': '20160303', | ||||||
|             }, |             }, | ||||||
|             'skip': 'This content is no longer available.', |             'skip': 'This content is no longer available.', | ||||||
|         }, { |         }, | ||||||
|  |         { | ||||||
|             'url': 'https://twitter.com/i/videos/752274308186120192', |             'url': 'https://twitter.com/i/videos/752274308186120192', | ||||||
|             'only_matching': True, |             'only_matching': True, | ||||||
|         }, |         }, | ||||||
| @@ -211,7 +301,6 @@ class TwitterIE(TwitterBaseIE): | |||||||
|             'duration': 12.922, |             'duration': 12.922, | ||||||
|             'timestamp': 1442188653, |             'timestamp': 1442188653, | ||||||
|             'upload_date': '20150913', |             'upload_date': '20150913', | ||||||
|             'age_limit': 18, |  | ||||||
|             'uploader_url': 'https://twitter.com/freethenipple', |             'uploader_url': 'https://twitter.com/freethenipple', | ||||||
|             'comment_count': int, |             'comment_count': int, | ||||||
|             'repost_count': int, |             'repost_count': int, | ||||||
| @@ -239,10 +328,10 @@ class TwitterIE(TwitterBaseIE): | |||||||
|             'id': '665052190608723968', |             'id': '665052190608723968', | ||||||
|             'display_id': '665052190608723968', |             'display_id': '665052190608723968', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.', |             'title': 'md5:3f57ab5d35116537a2ae7345cd0060d8', | ||||||
|             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', |             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', | ||||||
|             'uploader_id': 'starwars', |             'uploader_id': 'starwars', | ||||||
|             'uploader': 'Star Wars', |             'uploader': r're:Star Wars.*', | ||||||
|             'timestamp': 1447395772, |             'timestamp': 1447395772, | ||||||
|             'upload_date': '20151113', |             'upload_date': '20151113', | ||||||
|             'uploader_url': 'https://twitter.com/starwars', |             'uploader_url': 'https://twitter.com/starwars', | ||||||
| @@ -487,7 +576,7 @@ class TwitterIE(TwitterBaseIE): | |||||||
|             'uploader_url': 'https://twitter.com/oshtru', |             'uploader_url': 'https://twitter.com/oshtru', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg', |             'thumbnail': r're:^https?://.*\.jpg', | ||||||
|             'duration': 30.03, |             'duration': 30.03, | ||||||
|             'timestamp': 1665025050.0, |             'timestamp': 1665025050, | ||||||
|             'comment_count': int, |             'comment_count': int, | ||||||
|             'repost_count': int, |             'repost_count': int, | ||||||
|             'like_count': int, |             'like_count': int, | ||||||
| @@ -505,7 +594,7 @@ class TwitterIE(TwitterBaseIE): | |||||||
|             'uploader_id': 'UltimaShadowX', |             'uploader_id': 'UltimaShadowX', | ||||||
|             'uploader_url': 'https://twitter.com/UltimaShadowX', |             'uploader_url': 'https://twitter.com/UltimaShadowX', | ||||||
|             'upload_date': '20221005', |             'upload_date': '20221005', | ||||||
|             'timestamp': 1664992565.0, |             'timestamp': 1664992565, | ||||||
|             'comment_count': int, |             'comment_count': int, | ||||||
|             'repost_count': int, |             'repost_count': int, | ||||||
|             'like_count': int, |             'like_count': int, | ||||||
| @@ -514,6 +603,121 @@ class TwitterIE(TwitterBaseIE): | |||||||
|         }, |         }, | ||||||
|         'playlist_count': 4, |         'playlist_count': 4, | ||||||
|         'params': {'skip_download': True}, |         'params': {'skip_download': True}, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1575559336759263233', | ||||||
|  |             'display_id': '1575560063510810624', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9', | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg', | ||||||
|  |             'description': 'md5:95aea692fda36a12081b9629b02daa92', | ||||||
|  |             'uploader': 'Max Olson', | ||||||
|  |             'uploader_id': 'MesoMax919', | ||||||
|  |             'uploader_url': 'https://twitter.com/MesoMax919', | ||||||
|  |             'duration': 21.321, | ||||||
|  |             'timestamp': 1664477766, | ||||||
|  |             'upload_date': '20220929', | ||||||
|  |             'comment_count': int, | ||||||
|  |             'repost_count': int, | ||||||
|  |             'like_count': int, | ||||||
|  |             'tags': ['HurricaneIan'], | ||||||
|  |             'age_limit': 0, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         # Adult content, uses old token | ||||||
|  |         # Fails if not logged in (GraphQL) | ||||||
|  |         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1575199163847000068', | ||||||
|  |             'display_id': '1575199173472927762', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': str, | ||||||
|  |             'description': str, | ||||||
|  |             'uploader': str, | ||||||
|  |             'uploader_id': 'Rizdraws', | ||||||
|  |             'uploader_url': 'https://twitter.com/Rizdraws', | ||||||
|  |             'upload_date': '20220928', | ||||||
|  |             'timestamp': 1664391723, | ||||||
|  |             'thumbnail': 're:^https?://.*\\.jpg', | ||||||
|  |             'like_count': int, | ||||||
|  |             'repost_count': int, | ||||||
|  |             'comment_count': int, | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'tags': [] | ||||||
|  |         }, | ||||||
|  |         'expected_warnings': ['404'], | ||||||
|  |     }, { | ||||||
|  |         # Description is missing one https://t.co url (GraphQL) | ||||||
|  |         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435', | ||||||
|  |         'playlist_mincount': 2, | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1395079556562706435', | ||||||
|  |             'title': str, | ||||||
|  |             'tags': [], | ||||||
|  |             'uploader': str, | ||||||
|  |             'like_count': int, | ||||||
|  |             'upload_date': '20210519', | ||||||
|  |             'age_limit': 0, | ||||||
|  |             'repost_count': int, | ||||||
|  |             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7', | ||||||
|  |             'uploader_id': 'Srirachachau', | ||||||
|  |             'comment_count': int, | ||||||
|  |             'uploader_url': 'https://twitter.com/Srirachachau', | ||||||
|  |             'timestamp': 1621447860, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         # Description is missing one https://t.co url (GraphQL) | ||||||
|  |         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568', | ||||||
|  |         'playlist_mincount': 2, | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1578353380363501568', | ||||||
|  |             'title': str, | ||||||
|  |             'uploader_id': 'DavidToons_', | ||||||
|  |             'repost_count': int, | ||||||
|  |             'like_count': int, | ||||||
|  |             'uploader': str, | ||||||
|  |             'timestamp': 1665143744, | ||||||
|  |             'uploader_url': 'https://twitter.com/DavidToons_', | ||||||
|  |             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w', | ||||||
|  |             'tags': [], | ||||||
|  |             'comment_count': int, | ||||||
|  |             'upload_date': '20221007', | ||||||
|  |             'age_limit': 0, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://twitter.com/primevideouk/status/1578401165338976258', | ||||||
|  |         'playlist_count': 2, | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1578401165338976258', | ||||||
|  |             'title': str, | ||||||
|  |             'description': 'md5:659a6b517a034b4cee5d795381a2dc41', | ||||||
|  |             'uploader': str, | ||||||
|  |             'uploader_id': 'primevideouk', | ||||||
|  |             'timestamp': 1665155137, | ||||||
|  |             'upload_date': '20221007', | ||||||
|  |             'age_limit': 0, | ||||||
|  |             'uploader_url': 'https://twitter.com/primevideouk', | ||||||
|  |             'comment_count': int, | ||||||
|  |             'repost_count': int, | ||||||
|  |             'like_count': int, | ||||||
|  |             'tags': ['TheRingsOfPower'], | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         # Twitter Spaces | ||||||
|  |         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1lPJqmBeeNAJb', | ||||||
|  |             'ext': 'm4a', | ||||||
|  |             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West', | ||||||
|  |             'uploader': r're:Monique Camarra.+?', | ||||||
|  |             'uploader_id': 'MoniqueCamarra', | ||||||
|  |             'live_status': 'was_live', | ||||||
|  |             'description': 'md5:acce559345fd49f129c20dbcda3f1201', | ||||||
|  |             'timestamp': 1658407771464, | ||||||
|  |         }, | ||||||
|  |         'add_ie': ['TwitterSpaces'], | ||||||
|  |         'params': {'skip_download': 'm3u8'}, | ||||||
|     }, { |     }, { | ||||||
|         # onion route |         # onion route | ||||||
|         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273', |         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273', | ||||||
| @@ -552,10 +756,77 @@ class TwitterIE(TwitterBaseIE): | |||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|  |     def _graphql_to_legacy(self, data, twid): | ||||||
|  |         result = traverse_obj(data, ( | ||||||
|  |             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries', | ||||||
|  |             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent', | ||||||
|  |             'tweet_results', 'result' | ||||||
|  |         ), expected_type=dict, default={}, get_all=False) | ||||||
|  | 
 | ||||||
|  |         if 'tombstone' in result: | ||||||
|  |             cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str) | ||||||
|  |             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True) | ||||||
|  | 
 | ||||||
|  |         status = result.get('legacy', {}) | ||||||
|  |         status.update(traverse_obj(result, { | ||||||
|  |             'user': ('core', 'user_results', 'result', 'legacy'), | ||||||
|  |             'card': ('card', 'legacy'), | ||||||
|  |             'quoted_status': ('quoted_status_result', 'result', 'legacy'), | ||||||
|  |         }, expected_type=dict, default={})) | ||||||
|  | 
 | ||||||
|  |         # extra transformation is needed since result does not match legacy format | ||||||
|  |         binding_values = { | ||||||
|  |             binding_value.get('key'): binding_value.get('value') | ||||||
|  |             for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict) | ||||||
|  |         } | ||||||
|  |         if binding_values: | ||||||
|  |             status['card']['binding_values'] = binding_values | ||||||
|  | 
 | ||||||
|  |         return status | ||||||
|  | 
 | ||||||
|  |     def _build_graphql_query(self, media_id): | ||||||
|  |         return { | ||||||
|  |             'variables': { | ||||||
|  |                 'focalTweetId': media_id, | ||||||
|  |                 'includePromotedContent': True, | ||||||
|  |                 'with_rux_injections': False, | ||||||
|  |                 'withBirdwatchNotes': True, | ||||||
|  |                 'withCommunity': True, | ||||||
|  |                 'withDownvotePerspective': False, | ||||||
|  |                 'withQuickPromoteEligibilityTweetFields': True, | ||||||
|  |                 'withReactionsMetadata': False, | ||||||
|  |                 'withReactionsPerspective': False, | ||||||
|  |                 'withSuperFollowsTweetFields': True, | ||||||
|  |                 'withSuperFollowsUserFields': True, | ||||||
|  |                 'withV2Timeline': True, | ||||||
|  |                 'withVoice': True, | ||||||
|  |             }, | ||||||
|  |             'features': { | ||||||
|  |                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False, | ||||||
|  |                 'interactive_text_enabled': True, | ||||||
|  |                 'responsive_web_edit_tweet_api_enabled': True, | ||||||
|  |                 'responsive_web_enhance_cards_enabled': True, | ||||||
|  |                 'responsive_web_graphql_timeline_navigation_enabled': False, | ||||||
|  |                 'responsive_web_text_conversations_enabled': False, | ||||||
|  |                 'responsive_web_uc_gql_enabled': True, | ||||||
|  |                 'standardized_nudges_misinfo': True, | ||||||
|  |                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False, | ||||||
|  |                 'tweetypie_unmention_optimization_enabled': True, | ||||||
|  |                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True, | ||||||
|  |                 'verified_phone_label_enabled': False, | ||||||
|  |                 'vibe_api_enabled': True, | ||||||
|  |             }, | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         twid = self._match_id(url) |         twid = self._match_id(url) | ||||||
|         status = self._call_api( |         if self.is_logged_in or self._configuration_arg('force_graphql'): | ||||||
|             'statuses/show/%s.json' % twid, twid, { |             self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})') | ||||||
|  |             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid) | ||||||
|  |             status = self._graphql_to_legacy(result, twid) | ||||||
|  | 
 | ||||||
|  |         else: | ||||||
|  |             status = self._call_api(f'statuses/show/{twid}.json', twid, { | ||||||
|                 'cards_platform': 'Web-12', |                 'cards_platform': 'Web-12', | ||||||
|                 'include_cards': 1, |                 'include_cards': 1, | ||||||
|                 'include_reply_count': 1, |                 'include_reply_count': 1, | ||||||
| @@ -569,7 +840,7 @@ class TwitterIE(TwitterBaseIE): | |||||||
|         user = status.get('user') or {} |         user = status.get('user') or {} | ||||||
|         uploader = user.get('name') |         uploader = user.get('name') | ||||||
|         if uploader: |         if uploader: | ||||||
|             title = '%s - %s' % (uploader, title) |             title = f'{uploader} - {title}' | ||||||
|         uploader_id = user.get('screen_name') |         uploader_id = user.get('screen_name') | ||||||
| 
 | 
 | ||||||
|         tags = [] |         tags = [] | ||||||
| @@ -642,31 +913,37 @@ class TwitterIE(TwitterBaseIE): | |||||||
| 
 | 
 | ||||||
|             card_name = card['name'].split(':')[-1] |             card_name = card['name'].split(':')[-1] | ||||||
|             if card_name == 'player': |             if card_name == 'player': | ||||||
|                 return { |                 yield { | ||||||
|                     '_type': 'url', |                     '_type': 'url', | ||||||
|                     'url': get_binding_value('player_url'), |                     'url': get_binding_value('player_url'), | ||||||
|                 } |                 } | ||||||
|             elif card_name == 'periscope_broadcast': |             elif card_name == 'periscope_broadcast': | ||||||
|                 return { |                 yield { | ||||||
|                     '_type': 'url', |                     '_type': 'url', | ||||||
|                     'url': get_binding_value('url') or get_binding_value('player_url'), |                     'url': get_binding_value('url') or get_binding_value('player_url'), | ||||||
|                     'ie_key': PeriscopeIE.ie_key(), |                     'ie_key': PeriscopeIE.ie_key(), | ||||||
|                 } |                 } | ||||||
|             elif card_name == 'broadcast': |             elif card_name == 'broadcast': | ||||||
|                 return { |                 yield { | ||||||
|                     '_type': 'url', |                     '_type': 'url', | ||||||
|                     'url': get_binding_value('broadcast_url'), |                     'url': get_binding_value('broadcast_url'), | ||||||
|                     'ie_key': TwitterBroadcastIE.ie_key(), |                     'ie_key': TwitterBroadcastIE.ie_key(), | ||||||
|                 } |                 } | ||||||
|  |             elif card_name == 'audiospace': | ||||||
|  |                 yield { | ||||||
|  |                     '_type': 'url', | ||||||
|  |                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}', | ||||||
|  |                     'ie_key': TwitterSpacesIE.ie_key(), | ||||||
|  |                 } | ||||||
|             elif card_name == 'summary': |             elif card_name == 'summary': | ||||||
|                 return { |                 yield { | ||||||
|                     '_type': 'url', |                     '_type': 'url', | ||||||
|                     'url': get_binding_value('card_url'), |                     'url': get_binding_value('card_url'), | ||||||
|                 } |                 } | ||||||
|             elif card_name == 'unified_card': |             elif card_name == 'unified_card': | ||||||
|                 media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities'] |                 unified_card = self._parse_json(get_binding_value('unified_card'), twid) | ||||||
|                 media = traverse_obj(media_entities, ..., expected_type=dict, get_all=False) |                 yield from map(extract_from_video_info, traverse_obj( | ||||||
|                 return extract_from_video_info(media) |                     unified_card, ('media_entities', ...), expected_type=dict)) | ||||||
|             # amplify, promo_video_website, promo_video_convo, appplayer, |             # amplify, promo_video_website, promo_video_convo, appplayer, | ||||||
|             # video_direct_message, poll2choice_video, poll3choice_video, |             # video_direct_message, poll2choice_video, poll3choice_video, | ||||||
|             # poll4choice_video, ... |             # poll4choice_video, ... | ||||||
| @@ -690,7 +967,7 @@ class TwitterIE(TwitterBaseIE): | |||||||
|                         'height': int_or_none(image.get('height')), |                         'height': int_or_none(image.get('height')), | ||||||
|                     }) |                     }) | ||||||
| 
 | 
 | ||||||
|                 return { |                 yield { | ||||||
|                     'formats': formats, |                     'formats': formats, | ||||||
|                     'subtitles': subtitles, |                     'subtitles': subtitles, | ||||||
|                     'thumbnails': thumbnails, |                     'thumbnails': thumbnails, | ||||||
| @@ -700,11 +977,8 @@ class TwitterIE(TwitterBaseIE): | |||||||
| 
 | 
 | ||||||
|         media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo') |         media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo') | ||||||
|         videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict)) |         videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict)) | ||||||
|         entries = [{**info, **data, 'display_id': twid} for data in videos if data] |         cards = extract_from_card_info(status.get('card')) | ||||||
| 
 |         entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)] | ||||||
|         data = extract_from_card_info(status.get('card')) |  | ||||||
|         if data: |  | ||||||
|             entries.append({**info, **data, 'display_id': twid}) |  | ||||||
| 
 | 
 | ||||||
|         if not entries: |         if not entries: | ||||||
|             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none) |             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none) | ||||||
| @@ -730,13 +1004,14 @@ class TwitterAmplifyIE(TwitterBaseIE): | |||||||
| 
 | 
 | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', |         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', | ||||||
|         'md5': '7df102d0b9fd7066b86f3159f8e81bf6', |         'md5': 'fec25801d18a4557c5c9f33d2c379ffa', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', |             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Twitter Video', |             'title': 'Twitter Video', | ||||||
|             'thumbnail': 're:^https?://.*', |             'thumbnail': 're:^https?://.*', | ||||||
|         }, |         }, | ||||||
|  |         'params': {'format': '[protocol=https]'}, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -745,7 +1020,7 @@ class TwitterAmplifyIE(TwitterBaseIE): | |||||||
| 
 | 
 | ||||||
|         vmap_url = self._html_search_meta( |         vmap_url = self._html_search_meta( | ||||||
|             'twitter:amplify:vmap', webpage, 'vmap url') |             'twitter:amplify:vmap', webpage, 'vmap url') | ||||||
|         formats = self._extract_formats_from_vmap_url(vmap_url, video_id) |         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id) | ||||||
| 
 | 
 | ||||||
|         thumbnails = [] |         thumbnails = [] | ||||||
|         thumbnail = self._html_search_meta( |         thumbnail = self._html_search_meta( | ||||||
| @@ -793,6 +1068,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): | |||||||
|             'title': 'Andrea May Sahouri - Periscope Broadcast', |             'title': 'Andrea May Sahouri - Periscope Broadcast', | ||||||
|             'uploader': 'Andrea May Sahouri', |             'uploader': 'Andrea May Sahouri', | ||||||
|             'uploader_id': '1PXEdBZWpGwKe', |             'uploader_id': '1PXEdBZWpGwKe', | ||||||
|  |             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=', | ||||||
|  |             'view_count': int, | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @@ -804,7 +1081,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): | |||||||
|         info = self._parse_broadcast_data(broadcast, broadcast_id) |         info = self._parse_broadcast_data(broadcast, broadcast_id) | ||||||
|         media_key = broadcast['media_key'] |         media_key = broadcast['media_key'] | ||||||
|         source = self._call_api( |         source = self._call_api( | ||||||
|             'live_video_stream/status/' + media_key, media_key)['source'] |             f'live_video_stream/status/{media_key}', media_key)['source'] | ||||||
|         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location'] |         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location'] | ||||||
|         if '/live_video_stream/geoblocked/' in m3u8_url: |         if '/live_video_stream/geoblocked/' in m3u8_url: | ||||||
|             self.raise_geo_restricted() |             self.raise_geo_restricted() | ||||||
| @@ -816,6 +1093,100 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): | |||||||
|         return info |         return info | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class TwitterSpacesIE(TwitterBaseIE): | ||||||
|  |     IE_NAME = 'twitter:spaces' | ||||||
|  |     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})' | ||||||
|  |     _TWITTER_GRAPHQL = 'https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/' | ||||||
|  | 
 | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1RDxlgyvNXzJL', | ||||||
|  |             'ext': 'm4a', | ||||||
|  |             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro', | ||||||
|  |             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe', | ||||||
|  |             'uploader': r're:Lucio Di Gaetano.*?', | ||||||
|  |             'uploader_id': 'luciodigaetano', | ||||||
|  |             'live_status': 'was_live', | ||||||
|  |             'timestamp': 1659877956397, | ||||||
|  |         }, | ||||||
|  |         'params': {'skip_download': 'm3u8'}, | ||||||
|  |     }] | ||||||
|  | 
 | ||||||
|  |     SPACE_STATUS = { | ||||||
|  |         'notstarted': 'is_upcoming', | ||||||
|  |         'ended': 'was_live', | ||||||
|  |         'running': 'is_live', | ||||||
|  |         'timedout': 'post_live', | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     def _build_graphql_query(self, space_id): | ||||||
|  |         return { | ||||||
|  |             'variables': { | ||||||
|  |                 'id': space_id, | ||||||
|  |                 'isMetatagsQuery': True, | ||||||
|  |                 'withDownvotePerspective': False, | ||||||
|  |                 'withReactionsMetadata': False, | ||||||
|  |                 'withReactionsPerspective': False, | ||||||
|  |                 'withReplays': True, | ||||||
|  |                 'withSuperFollowsUserFields': True, | ||||||
|  |                 'withSuperFollowsTweetFields': True, | ||||||
|  |             }, | ||||||
|  |             'features': { | ||||||
|  |                 'dont_mention_me_view_api_enabled': True, | ||||||
|  |                 'interactive_text_enabled': True, | ||||||
|  |                 'responsive_web_edit_tweet_api_enabled': True, | ||||||
|  |                 'responsive_web_enhance_cards_enabled': True, | ||||||
|  |                 'responsive_web_uc_gql_enabled': True, | ||||||
|  |                 'spaces_2022_h2_clipping': True, | ||||||
|  |                 'spaces_2022_h2_spaces_communities': False, | ||||||
|  |                 'standardized_nudges_misinfo': True, | ||||||
|  |                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False, | ||||||
|  |                 'vibe_api_enabled': True, | ||||||
|  |             }, | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         space_id = self._match_id(url) | ||||||
|  |         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace'] | ||||||
|  |         if not space_data: | ||||||
|  |             raise ExtractorError('Twitter Space not found', expected=True) | ||||||
|  | 
 | ||||||
|  |         metadata = space_data['metadata'] | ||||||
|  |         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()]) | ||||||
|  | 
 | ||||||
|  |         formats = [] | ||||||
|  |         if live_status == 'is_upcoming': | ||||||
|  |             self.raise_no_formats('Twitter Space not started yet', expected=True) | ||||||
|  |         elif live_status == 'post_live': | ||||||
|  |             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True) | ||||||
|  |         else: | ||||||
|  |             source = self._call_api( | ||||||
|  |                 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source'] | ||||||
|  | 
 | ||||||
|  |             # XXX: Native downloader does not work | ||||||
|  |             formats = self._extract_m3u8_formats( | ||||||
|  |                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'), | ||||||
|  |                 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live') | ||||||
|  |             for fmt in formats: | ||||||
|  |                 fmt.update({'vcodec': 'none', 'acodec': 'aac'}) | ||||||
|  | 
 | ||||||
|  |         participants = ', '.join(traverse_obj( | ||||||
|  |             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet' | ||||||
|  |         return { | ||||||
|  |             'id': space_id, | ||||||
|  |             'title': metadata.get('title'), | ||||||
|  |             'description': f'Twitter Space participated by {participants}', | ||||||
|  |             'uploader': traverse_obj( | ||||||
|  |                 metadata, ('creator_results', 'result', 'legacy', 'name')), | ||||||
|  |             'uploader_id': traverse_obj( | ||||||
|  |                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')), | ||||||
|  |             'live_status': live_status, | ||||||
|  |             'timestamp': metadata.get('created_at'), | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class TwitterShortenerIE(TwitterBaseIE): | class TwitterShortenerIE(TwitterBaseIE): | ||||||
|     IE_NAME = 'twitter:shortener' |     IE_NAME = 'twitter:shortener' | ||||||
|     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)' |     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)' | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Simon Sawicki
					Simon Sawicki