mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Closes #1605, Closes #5233, Closes #1249 Authored by: Grub4K, nixxo, bashonly, pukkandan Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: nixxo <nixxo@protonmail.com>
This commit is contained in:
		| @@ -1,9 +1,11 @@ | ||||
| import json | ||||
| import re | ||||
| import urllib.error | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from .periscope import PeriscopeBaseIE, PeriscopeIE | ||||
| from ..compat import functools  # isort: split | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urllib_parse_urlparse, | ||||
| @@ -18,6 +20,7 @@ from ..utils import ( | ||||
|     str_or_none, | ||||
|     strip_or_none, | ||||
|     traverse_obj, | ||||
|     try_call, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     update_url_query, | ||||
| @@ -28,8 +31,12 @@ from ..utils import ( | ||||
| 
 | ||||
| class TwitterBaseIE(InfoExtractor): | ||||
|     _API_BASE = 'https://api.twitter.com/1.1/' | ||||
|     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/' | ||||
|     _TOKENS = { | ||||
|         'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None, | ||||
|         'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None, | ||||
|     } | ||||
|     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/' | ||||
|     _GUEST_TOKEN = None | ||||
| 
 | ||||
|     def _extract_variant_formats(self, variant, video_id): | ||||
|         variant_url = variant.get('url') | ||||
| @@ -81,28 +88,73 @@ class TwitterBaseIE(InfoExtractor): | ||||
|                 'height': int(m.group('height')), | ||||
|             }) | ||||
| 
 | ||||
|     def _call_api(self, path, video_id, query={}): | ||||
|         headers = { | ||||
|             'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA', | ||||
|         } | ||||
|         token = self._get_cookies(self._API_BASE).get('ct0') | ||||
|         if token: | ||||
|             headers['x-csrf-token'] = token.value | ||||
|         if not self._GUEST_TOKEN: | ||||
|             self._GUEST_TOKEN = self._download_json( | ||||
|                 self._API_BASE + 'guest/activate.json', video_id, | ||||
|                 'Downloading guest token', data=b'', | ||||
|                 headers=headers)['guest_token'] | ||||
|         headers['x-guest-token'] = self._GUEST_TOKEN | ||||
|         try: | ||||
|             return self._download_json( | ||||
|                 self._API_BASE + path, video_id, headers=headers, query=query) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 raise ExtractorError(self._parse_json( | ||||
|                     e.cause.read().decode(), | ||||
|                     video_id)['errors'][0]['message'], expected=True) | ||||
|             raise | ||||
|     @functools.cached_property | ||||
|     def is_logged_in(self): | ||||
|         return bool(self._get_cookies(self._API_BASE).get('auth_token')) | ||||
| 
 | ||||
|     def _call_api(self, path, video_id, query={}, graphql=False): | ||||
|         cookies = self._get_cookies(self._API_BASE) | ||||
|         headers = {} | ||||
| 
 | ||||
|         csrf_cookie = cookies.get('ct0') | ||||
|         if csrf_cookie: | ||||
|             headers['x-csrf-token'] = csrf_cookie.value | ||||
| 
 | ||||
|         if self.is_logged_in: | ||||
|             headers.update({ | ||||
|                 'x-twitter-auth-type': 'OAuth2Session', | ||||
|                 'x-twitter-client-language': 'en', | ||||
|                 'x-twitter-active-user': 'yes', | ||||
|             }) | ||||
| 
 | ||||
|         result, last_error = None, None | ||||
|         for bearer_token in self._TOKENS: | ||||
|             headers['Authorization'] = f'Bearer {bearer_token}' | ||||
| 
 | ||||
|             if not self.is_logged_in: | ||||
|                 if not self._TOKENS[bearer_token]: | ||||
|                     headers.pop('x-guest-token', None) | ||||
|                     guest_token_response = self._download_json( | ||||
|                         self._API_BASE + 'guest/activate.json', video_id, | ||||
|                         'Downloading guest token', data=b'', headers=headers) | ||||
| 
 | ||||
|                     self._TOKENS[bearer_token] = guest_token_response.get('guest_token') | ||||
|                     if not self._TOKENS[bearer_token]: | ||||
|                         raise ExtractorError('Could not retrieve guest token') | ||||
|                 headers['x-guest-token'] = self._TOKENS[bearer_token] | ||||
| 
 | ||||
|             try: | ||||
|                 allowed_status = {400, 403, 404} if graphql else {403} | ||||
|                 result = self._download_json( | ||||
|                     (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, | ||||
|                     video_id, headers=headers, query=query, expected_status=allowed_status) | ||||
|                 break | ||||
| 
 | ||||
|             except ExtractorError as e: | ||||
|                 if last_error: | ||||
|                     raise last_error | ||||
|                 elif not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404: | ||||
|                     raise | ||||
|                 last_error = e | ||||
|                 self.report_warning( | ||||
|                     'Twitter API gave 404 response, retrying with deprecated token. ' | ||||
|                     'Only one media item can be extracted') | ||||
| 
 | ||||
|         if result.get('errors'): | ||||
|             error_message = ', '.join(set(traverse_obj( | ||||
|                 result, ('errors', ..., 'message'), expected_type=str))) or 'Unknown error' | ||||
|             raise ExtractorError(f'Error(s) while querying api: {error_message}', expected=True) | ||||
| 
 | ||||
|         assert result is not None | ||||
|         return result | ||||
| 
 | ||||
|     def _build_graphql_query(self, media_id): | ||||
|         raise NotImplementedError('Method must be implemented to support GraphQL') | ||||
| 
 | ||||
|     def _call_graphql_api(self, endpoint, media_id): | ||||
|         data = self._build_graphql_query(media_id) | ||||
|         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()} | ||||
|         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data') | ||||
| 
 | ||||
| 
 | ||||
| class TwitterCardIE(InfoExtractor): | ||||
| @@ -113,7 +165,7 @@ class TwitterCardIE(InfoExtractor): | ||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', | ||||
|             # MD5 checksums are different in different places | ||||
|             'info_dict': { | ||||
|                 'id': '560070183650213889', | ||||
|                 'id': '560070131976392705', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.", | ||||
|                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96', | ||||
| @@ -123,6 +175,13 @@ class TwitterCardIE(InfoExtractor): | ||||
|                 'duration': 30.033, | ||||
|                 'timestamp': 1422366112, | ||||
|                 'upload_date': '20150127', | ||||
|                 'age_limit': 0, | ||||
|                 'comment_count': int, | ||||
|                 'tags': [], | ||||
|                 'repost_count': int, | ||||
|                 'like_count': int, | ||||
|                 'display_id': '560070183650213889', | ||||
|                 'uploader_url': 'https://twitter.com/Twitter', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -137,7 +196,14 @@ class TwitterCardIE(InfoExtractor): | ||||
|                 'uploader_id': 'NASA', | ||||
|                 'timestamp': 1437408129, | ||||
|                 'upload_date': '20150720', | ||||
|                 'uploader_url': 'https://twitter.com/NASA', | ||||
|                 'age_limit': 0, | ||||
|                 'comment_count': int, | ||||
|                 'like_count': int, | ||||
|                 'repost_count': int, | ||||
|                 'tags': ['PlutoFlyby'], | ||||
|             }, | ||||
|             'params': {'format': '[protocol=https]'} | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', | ||||
| @@ -150,12 +216,27 @@ class TwitterCardIE(InfoExtractor): | ||||
|                 'upload_date': '20111013', | ||||
|                 'uploader': 'OMG! UBUNTU!', | ||||
|                 'uploader_id': 'omgubuntu', | ||||
|                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ', | ||||
|                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ', | ||||
|                 'channel_follower_count': int, | ||||
|                 'chapters': 'count:8', | ||||
|                 'uploader_url': 'http://www.youtube.com/user/omgubuntu', | ||||
|                 'duration': 138, | ||||
|                 'categories': ['Film & Animation'], | ||||
|                 'age_limit': 0, | ||||
|                 'comment_count': int, | ||||
|                 'availability': 'public', | ||||
|                 'like_count': int, | ||||
|                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg', | ||||
|                 'view_count': int, | ||||
|                 'tags': 'count:12', | ||||
|                 'channel': 'OMG! UBUNTU!', | ||||
|                 'playable_in_embed': True, | ||||
|             }, | ||||
|             'add_ie': ['Youtube'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568', | ||||
|             'md5': '6dabeaca9e68cbb71c99c322a4b42a11', | ||||
|             'info_dict': { | ||||
|                 'id': 'iBb2x00UVlv', | ||||
|                 'ext': 'mp4', | ||||
| @@ -164,9 +245,17 @@ class TwitterCardIE(InfoExtractor): | ||||
|                 'uploader': 'ArsenalTerje', | ||||
|                 'title': 'Vine by ArsenalTerje', | ||||
|                 'timestamp': 1447451307, | ||||
|                 'alt_title': 'Vine by ArsenalTerje', | ||||
|                 'comment_count': int, | ||||
|                 'like_count': int, | ||||
|                 'thumbnail': r're:^https?://[^?#]+\.jpg', | ||||
|                 'view_count': int, | ||||
|                 'repost_count': int, | ||||
|             }, | ||||
|             'add_ie': ['Vine'], | ||||
|         }, { | ||||
|             'params': {'skip_download': 'm3u8'}, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://twitter.com/i/videos/tweet/705235433198714880', | ||||
|             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88', | ||||
|             'info_dict': { | ||||
| @@ -180,7 +269,8 @@ class TwitterCardIE(InfoExtractor): | ||||
|                 'upload_date': '20160303', | ||||
|             }, | ||||
|             'skip': 'This content is no longer available.', | ||||
|         }, { | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://twitter.com/i/videos/752274308186120192', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
| @@ -211,7 +301,6 @@ class TwitterIE(TwitterBaseIE): | ||||
|             'duration': 12.922, | ||||
|             'timestamp': 1442188653, | ||||
|             'upload_date': '20150913', | ||||
|             'age_limit': 18, | ||||
|             'uploader_url': 'https://twitter.com/freethenipple', | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -239,10 +328,10 @@ class TwitterIE(TwitterBaseIE): | ||||
|             'id': '665052190608723968', | ||||
|             'display_id': '665052190608723968', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.', | ||||
|             'title': 'md5:3f57ab5d35116537a2ae7345cd0060d8', | ||||
|             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', | ||||
|             'uploader_id': 'starwars', | ||||
|             'uploader': 'Star Wars', | ||||
|             'uploader': r're:Star Wars.*', | ||||
|             'timestamp': 1447395772, | ||||
|             'upload_date': '20151113', | ||||
|             'uploader_url': 'https://twitter.com/starwars', | ||||
| @@ -487,7 +576,7 @@ class TwitterIE(TwitterBaseIE): | ||||
|             'uploader_url': 'https://twitter.com/oshtru', | ||||
|             'thumbnail': r're:^https?://.*\.jpg', | ||||
|             'duration': 30.03, | ||||
|             'timestamp': 1665025050.0, | ||||
|             'timestamp': 1665025050, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
|             'like_count': int, | ||||
| @@ -505,7 +594,7 @@ class TwitterIE(TwitterBaseIE): | ||||
|             'uploader_id': 'UltimaShadowX', | ||||
|             'uploader_url': 'https://twitter.com/UltimaShadowX', | ||||
|             'upload_date': '20221005', | ||||
|             'timestamp': 1664992565.0, | ||||
|             'timestamp': 1664992565, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
|             'like_count': int, | ||||
| @@ -514,6 +603,121 @@ class TwitterIE(TwitterBaseIE): | ||||
|         }, | ||||
|         'playlist_count': 4, | ||||
|         'params': {'skip_download': True}, | ||||
|     }, { | ||||
|         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624', | ||||
|         'info_dict': { | ||||
|             'id': '1575559336759263233', | ||||
|             'display_id': '1575560063510810624', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9', | ||||
|             'thumbnail': r're:^https?://.*\.jpg', | ||||
|             'description': 'md5:95aea692fda36a12081b9629b02daa92', | ||||
|             'uploader': 'Max Olson', | ||||
|             'uploader_id': 'MesoMax919', | ||||
|             'uploader_url': 'https://twitter.com/MesoMax919', | ||||
|             'duration': 21.321, | ||||
|             'timestamp': 1664477766, | ||||
|             'upload_date': '20220929', | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
|             'like_count': int, | ||||
|             'tags': ['HurricaneIan'], | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|     }, { | ||||
|         # Adult content, uses old token | ||||
|         # Fails if not logged in (GraphQL) | ||||
|         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762', | ||||
|         'info_dict': { | ||||
|             'id': '1575199163847000068', | ||||
|             'display_id': '1575199173472927762', | ||||
|             'ext': 'mp4', | ||||
|             'title': str, | ||||
|             'description': str, | ||||
|             'uploader': str, | ||||
|             'uploader_id': 'Rizdraws', | ||||
|             'uploader_url': 'https://twitter.com/Rizdraws', | ||||
|             'upload_date': '20220928', | ||||
|             'timestamp': 1664391723, | ||||
|             'thumbnail': 're:^https?://.*\\.jpg', | ||||
|             'like_count': int, | ||||
|             'repost_count': int, | ||||
|             'comment_count': int, | ||||
|             'age_limit': 18, | ||||
|             'tags': [] | ||||
|         }, | ||||
|         'expected_warnings': ['404'], | ||||
|     }, { | ||||
|         # Description is missing one https://t.co url (GraphQL) | ||||
|         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435', | ||||
|         'playlist_mincount': 2, | ||||
|         'info_dict': { | ||||
|             'id': '1395079556562706435', | ||||
|             'title': str, | ||||
|             'tags': [], | ||||
|             'uploader': str, | ||||
|             'like_count': int, | ||||
|             'upload_date': '20210519', | ||||
|             'age_limit': 0, | ||||
|             'repost_count': int, | ||||
|             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7', | ||||
|             'uploader_id': 'Srirachachau', | ||||
|             'comment_count': int, | ||||
|             'uploader_url': 'https://twitter.com/Srirachachau', | ||||
|             'timestamp': 1621447860, | ||||
|         }, | ||||
|     }, { | ||||
|         # Description is missing one https://t.co url (GraphQL) | ||||
|         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568', | ||||
|         'playlist_mincount': 2, | ||||
|         'info_dict': { | ||||
|             'id': '1578353380363501568', | ||||
|             'title': str, | ||||
|             'uploader_id': 'DavidToons_', | ||||
|             'repost_count': int, | ||||
|             'like_count': int, | ||||
|             'uploader': str, | ||||
|             'timestamp': 1665143744, | ||||
|             'uploader_url': 'https://twitter.com/DavidToons_', | ||||
|             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w', | ||||
|             'tags': [], | ||||
|             'comment_count': int, | ||||
|             'upload_date': '20221007', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://twitter.com/primevideouk/status/1578401165338976258', | ||||
|         'playlist_count': 2, | ||||
|         'info_dict': { | ||||
|             'id': '1578401165338976258', | ||||
|             'title': str, | ||||
|             'description': 'md5:659a6b517a034b4cee5d795381a2dc41', | ||||
|             'uploader': str, | ||||
|             'uploader_id': 'primevideouk', | ||||
|             'timestamp': 1665155137, | ||||
|             'upload_date': '20221007', | ||||
|             'age_limit': 0, | ||||
|             'uploader_url': 'https://twitter.com/primevideouk', | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
|             'like_count': int, | ||||
|             'tags': ['TheRingsOfPower'], | ||||
|         }, | ||||
|     }, { | ||||
|         # Twitter Spaces | ||||
|         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360', | ||||
|         'info_dict': { | ||||
|             'id': '1lPJqmBeeNAJb', | ||||
|             'ext': 'm4a', | ||||
|             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West', | ||||
|             'uploader': r're:Monique Camarra.+?', | ||||
|             'uploader_id': 'MoniqueCamarra', | ||||
|             'live_status': 'was_live', | ||||
|             'description': 'md5:acce559345fd49f129c20dbcda3f1201', | ||||
|             'timestamp': 1658407771464, | ||||
|         }, | ||||
|         'add_ie': ['TwitterSpaces'], | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }, { | ||||
|         # onion route | ||||
|         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273', | ||||
| @@ -552,10 +756,77 @@ class TwitterIE(TwitterBaseIE): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     def _graphql_to_legacy(self, data, twid): | ||||
|         result = traverse_obj(data, ( | ||||
|             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries', | ||||
|             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent', | ||||
|             'tweet_results', 'result' | ||||
|         ), expected_type=dict, default={}, get_all=False) | ||||
| 
 | ||||
|         if 'tombstone' in result: | ||||
|             cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str) | ||||
|             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True) | ||||
| 
 | ||||
|         status = result.get('legacy', {}) | ||||
|         status.update(traverse_obj(result, { | ||||
|             'user': ('core', 'user_results', 'result', 'legacy'), | ||||
|             'card': ('card', 'legacy'), | ||||
|             'quoted_status': ('quoted_status_result', 'result', 'legacy'), | ||||
|         }, expected_type=dict, default={})) | ||||
| 
 | ||||
|         # extra transformation is needed since result does not match legacy format | ||||
|         binding_values = { | ||||
|             binding_value.get('key'): binding_value.get('value') | ||||
|             for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict) | ||||
|         } | ||||
|         if binding_values: | ||||
|             status['card']['binding_values'] = binding_values | ||||
| 
 | ||||
|         return status | ||||
| 
 | ||||
|     def _build_graphql_query(self, media_id): | ||||
|         return { | ||||
|             'variables': { | ||||
|                 'focalTweetId': media_id, | ||||
|                 'includePromotedContent': True, | ||||
|                 'with_rux_injections': False, | ||||
|                 'withBirdwatchNotes': True, | ||||
|                 'withCommunity': True, | ||||
|                 'withDownvotePerspective': False, | ||||
|                 'withQuickPromoteEligibilityTweetFields': True, | ||||
|                 'withReactionsMetadata': False, | ||||
|                 'withReactionsPerspective': False, | ||||
|                 'withSuperFollowsTweetFields': True, | ||||
|                 'withSuperFollowsUserFields': True, | ||||
|                 'withV2Timeline': True, | ||||
|                 'withVoice': True, | ||||
|             }, | ||||
|             'features': { | ||||
|                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False, | ||||
|                 'interactive_text_enabled': True, | ||||
|                 'responsive_web_edit_tweet_api_enabled': True, | ||||
|                 'responsive_web_enhance_cards_enabled': True, | ||||
|                 'responsive_web_graphql_timeline_navigation_enabled': False, | ||||
|                 'responsive_web_text_conversations_enabled': False, | ||||
|                 'responsive_web_uc_gql_enabled': True, | ||||
|                 'standardized_nudges_misinfo': True, | ||||
|                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False, | ||||
|                 'tweetypie_unmention_optimization_enabled': True, | ||||
|                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True, | ||||
|                 'verified_phone_label_enabled': False, | ||||
|                 'vibe_api_enabled': True, | ||||
|             }, | ||||
|         } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         twid = self._match_id(url) | ||||
|         status = self._call_api( | ||||
|             'statuses/show/%s.json' % twid, twid, { | ||||
|         if self.is_logged_in or self._configuration_arg('force_graphql'): | ||||
|             self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})') | ||||
|             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid) | ||||
|             status = self._graphql_to_legacy(result, twid) | ||||
| 
 | ||||
|         else: | ||||
|             status = self._call_api(f'statuses/show/{twid}.json', twid, { | ||||
|                 'cards_platform': 'Web-12', | ||||
|                 'include_cards': 1, | ||||
|                 'include_reply_count': 1, | ||||
| @@ -569,7 +840,7 @@ class TwitterIE(TwitterBaseIE): | ||||
|         user = status.get('user') or {} | ||||
|         uploader = user.get('name') | ||||
|         if uploader: | ||||
|             title = '%s - %s' % (uploader, title) | ||||
|             title = f'{uploader} - {title}' | ||||
|         uploader_id = user.get('screen_name') | ||||
| 
 | ||||
|         tags = [] | ||||
| @@ -642,31 +913,37 @@ class TwitterIE(TwitterBaseIE): | ||||
| 
 | ||||
|             card_name = card['name'].split(':')[-1] | ||||
|             if card_name == 'player': | ||||
|                 return { | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'url': get_binding_value('player_url'), | ||||
|                 } | ||||
|             elif card_name == 'periscope_broadcast': | ||||
|                 return { | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'url': get_binding_value('url') or get_binding_value('player_url'), | ||||
|                     'ie_key': PeriscopeIE.ie_key(), | ||||
|                 } | ||||
|             elif card_name == 'broadcast': | ||||
|                 return { | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'url': get_binding_value('broadcast_url'), | ||||
|                     'ie_key': TwitterBroadcastIE.ie_key(), | ||||
|                 } | ||||
|             elif card_name == 'audiospace': | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}', | ||||
|                     'ie_key': TwitterSpacesIE.ie_key(), | ||||
|                 } | ||||
|             elif card_name == 'summary': | ||||
|                 return { | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'url': get_binding_value('card_url'), | ||||
|                 } | ||||
|             elif card_name == 'unified_card': | ||||
|                 media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities'] | ||||
|                 media = traverse_obj(media_entities, ..., expected_type=dict, get_all=False) | ||||
|                 return extract_from_video_info(media) | ||||
|                 unified_card = self._parse_json(get_binding_value('unified_card'), twid) | ||||
|                 yield from map(extract_from_video_info, traverse_obj( | ||||
|                     unified_card, ('media_entities', ...), expected_type=dict)) | ||||
|             # amplify, promo_video_website, promo_video_convo, appplayer, | ||||
|             # video_direct_message, poll2choice_video, poll3choice_video, | ||||
|             # poll4choice_video, ... | ||||
| @@ -690,7 +967,7 @@ class TwitterIE(TwitterBaseIE): | ||||
|                         'height': int_or_none(image.get('height')), | ||||
|                     }) | ||||
| 
 | ||||
|                 return { | ||||
|                 yield { | ||||
|                     'formats': formats, | ||||
|                     'subtitles': subtitles, | ||||
|                     'thumbnails': thumbnails, | ||||
| @@ -700,11 +977,8 @@ class TwitterIE(TwitterBaseIE): | ||||
| 
 | ||||
|         media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo') | ||||
|         videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict)) | ||||
|         entries = [{**info, **data, 'display_id': twid} for data in videos if data] | ||||
| 
 | ||||
|         data = extract_from_card_info(status.get('card')) | ||||
|         if data: | ||||
|             entries.append({**info, **data, 'display_id': twid}) | ||||
|         cards = extract_from_card_info(status.get('card')) | ||||
|         entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)] | ||||
| 
 | ||||
|         if not entries: | ||||
|             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none) | ||||
| @@ -730,13 +1004,14 @@ class TwitterAmplifyIE(TwitterBaseIE): | ||||
| 
 | ||||
|     _TEST = { | ||||
|         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', | ||||
|         'md5': '7df102d0b9fd7066b86f3159f8e81bf6', | ||||
|         'md5': 'fec25801d18a4557c5c9f33d2c379ffa', | ||||
|         'info_dict': { | ||||
|             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Twitter Video', | ||||
|             'thumbnail': 're:^https?://.*', | ||||
|         }, | ||||
|         'params': {'format': '[protocol=https]'}, | ||||
|     } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
| @@ -745,7 +1020,7 @@ class TwitterAmplifyIE(TwitterBaseIE): | ||||
| 
 | ||||
|         vmap_url = self._html_search_meta( | ||||
|             'twitter:amplify:vmap', webpage, 'vmap url') | ||||
|         formats = self._extract_formats_from_vmap_url(vmap_url, video_id) | ||||
|         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id) | ||||
| 
 | ||||
|         thumbnails = [] | ||||
|         thumbnail = self._html_search_meta( | ||||
| @@ -793,6 +1068,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): | ||||
|             'title': 'Andrea May Sahouri - Periscope Broadcast', | ||||
|             'uploader': 'Andrea May Sahouri', | ||||
|             'uploader_id': '1PXEdBZWpGwKe', | ||||
|             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     } | ||||
| 
 | ||||
| @@ -804,7 +1081,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): | ||||
|         info = self._parse_broadcast_data(broadcast, broadcast_id) | ||||
|         media_key = broadcast['media_key'] | ||||
|         source = self._call_api( | ||||
|             'live_video_stream/status/' + media_key, media_key)['source'] | ||||
|             f'live_video_stream/status/{media_key}', media_key)['source'] | ||||
|         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location'] | ||||
|         if '/live_video_stream/geoblocked/' in m3u8_url: | ||||
|             self.raise_geo_restricted() | ||||
| @@ -816,6 +1093,100 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): | ||||
|         return info | ||||
| 
 | ||||
| 
 | ||||
| class TwitterSpacesIE(TwitterBaseIE): | ||||
|     IE_NAME = 'twitter:spaces' | ||||
|     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})' | ||||
|     _TWITTER_GRAPHQL = 'https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL', | ||||
|         'info_dict': { | ||||
|             'id': '1RDxlgyvNXzJL', | ||||
|             'ext': 'm4a', | ||||
|             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro', | ||||
|             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe', | ||||
|             'uploader': r're:Lucio Di Gaetano.*?', | ||||
|             'uploader_id': 'luciodigaetano', | ||||
|             'live_status': 'was_live', | ||||
|             'timestamp': 1659877956397, | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }] | ||||
| 
 | ||||
|     SPACE_STATUS = { | ||||
|         'notstarted': 'is_upcoming', | ||||
|         'ended': 'was_live', | ||||
|         'running': 'is_live', | ||||
|         'timedout': 'post_live', | ||||
|     } | ||||
| 
 | ||||
|     def _build_graphql_query(self, space_id): | ||||
|         return { | ||||
|             'variables': { | ||||
|                 'id': space_id, | ||||
|                 'isMetatagsQuery': True, | ||||
|                 'withDownvotePerspective': False, | ||||
|                 'withReactionsMetadata': False, | ||||
|                 'withReactionsPerspective': False, | ||||
|                 'withReplays': True, | ||||
|                 'withSuperFollowsUserFields': True, | ||||
|                 'withSuperFollowsTweetFields': True, | ||||
|             }, | ||||
|             'features': { | ||||
|                 'dont_mention_me_view_api_enabled': True, | ||||
|                 'interactive_text_enabled': True, | ||||
|                 'responsive_web_edit_tweet_api_enabled': True, | ||||
|                 'responsive_web_enhance_cards_enabled': True, | ||||
|                 'responsive_web_uc_gql_enabled': True, | ||||
|                 'spaces_2022_h2_clipping': True, | ||||
|                 'spaces_2022_h2_spaces_communities': False, | ||||
|                 'standardized_nudges_misinfo': True, | ||||
|                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False, | ||||
|                 'vibe_api_enabled': True, | ||||
|             }, | ||||
|         } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         space_id = self._match_id(url) | ||||
|         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace'] | ||||
|         if not space_data: | ||||
|             raise ExtractorError('Twitter Space not found', expected=True) | ||||
| 
 | ||||
|         metadata = space_data['metadata'] | ||||
|         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()]) | ||||
| 
 | ||||
|         formats = [] | ||||
|         if live_status == 'is_upcoming': | ||||
|             self.raise_no_formats('Twitter Space not started yet', expected=True) | ||||
|         elif live_status == 'post_live': | ||||
|             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True) | ||||
|         else: | ||||
|             source = self._call_api( | ||||
|                 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source'] | ||||
| 
 | ||||
|             # XXX: Native downloader does not work | ||||
|             formats = self._extract_m3u8_formats( | ||||
|                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'), | ||||
|                 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live') | ||||
|             for fmt in formats: | ||||
|                 fmt.update({'vcodec': 'none', 'acodec': 'aac'}) | ||||
| 
 | ||||
|         participants = ', '.join(traverse_obj( | ||||
|             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet' | ||||
|         return { | ||||
|             'id': space_id, | ||||
|             'title': metadata.get('title'), | ||||
|             'description': f'Twitter Space participated by {participants}', | ||||
|             'uploader': traverse_obj( | ||||
|                 metadata, ('creator_results', 'result', 'legacy', 'name')), | ||||
|             'uploader_id': traverse_obj( | ||||
|                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')), | ||||
|             'live_status': live_status, | ||||
|             'timestamp': metadata.get('created_at'), | ||||
|             'formats': formats, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class TwitterShortenerIE(TwitterBaseIE): | ||||
|     IE_NAME = 'twitter:shortener' | ||||
|     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Simon Sawicki
					Simon Sawicki