mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-31 20:11:26 +00:00
[ie/twitter] Do not extract non-video posts from unified_cards (#15431)
Closes #15402 Authored by: bashonly
This commit is contained in:
@@ -416,6 +416,7 @@ class TwitterCardIE(InfoExtractor):
|
|||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
'add_ie': ['Youtube'],
|
||||||
|
'skip': 'The page does not exist',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
||||||
@@ -617,6 +618,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'_old_archive_ids': ['twitter 852138619213144067'],
|
'_old_archive_ids': ['twitter 852138619213144067'],
|
||||||
},
|
},
|
||||||
|
'skip': 'Suspended',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -763,10 +765,10 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1577719286659006464',
|
'id': '1577719286659006464',
|
||||||
'title': 'Ultima - Test',
|
'title': r're:Ultima.* - Test$',
|
||||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||||
'channel_id': '168922496',
|
'channel_id': '168922496',
|
||||||
'uploader': 'Ultima',
|
'uploader': r're:Ultima.*',
|
||||||
'uploader_id': 'UltimaShadowX',
|
'uploader_id': 'UltimaShadowX',
|
||||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||||
'upload_date': '20221005',
|
'upload_date': '20221005',
|
||||||
@@ -895,11 +897,12 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'uploader': r're:Monique Camarra.+?',
|
'uploader': r're:Monique Camarra.+?',
|
||||||
'uploader_id': 'MoniqueCamarra',
|
'uploader_id': 'MoniqueCamarra',
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
'release_timestamp': 1658417414,
|
'release_timestamp': 1658417305,
|
||||||
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
|
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
|
||||||
'timestamp': 1658407771,
|
'timestamp': 1658407771,
|
||||||
'release_date': '20220721',
|
'release_date': '20220721',
|
||||||
'upload_date': '20220721',
|
'upload_date': '20220721',
|
||||||
|
'thumbnail': 'https://pbs.twimg.com/profile_images/1920514378006188033/xQs6J_yI_400x400.jpg',
|
||||||
},
|
},
|
||||||
'add_ie': ['TwitterSpaces'],
|
'add_ie': ['TwitterSpaces'],
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
@@ -1010,10 +1013,10 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
||||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'uploader': 'Boy Called Mün',
|
'uploader': 'D U N I Y A',
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'upload_date': '20221206',
|
'upload_date': '20221206',
|
||||||
'title': 'Boy Called Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
'title': 'D U N I Y A - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
@@ -1068,6 +1071,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'_old_archive_ids': ['twitter 1695424220702888009'],
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
||||||
},
|
},
|
||||||
|
'skip': 'Suspended',
|
||||||
}, {
|
}, {
|
||||||
# retweeted_status w/ legacy API
|
# retweeted_status w/ legacy API
|
||||||
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
||||||
@@ -1092,6 +1096,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'_old_archive_ids': ['twitter 1695424220702888009'],
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
||||||
},
|
},
|
||||||
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
||||||
|
'skip': 'Suspended',
|
||||||
}, {
|
}, {
|
||||||
# Broadcast embedded in tweet
|
# Broadcast embedded in tweet
|
||||||
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
|
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
|
||||||
@@ -1135,7 +1140,6 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# "stale tweet" with typename "TweetWithVisibilityResults"
|
# "stale tweet" with typename "TweetWithVisibilityResults"
|
||||||
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
|
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
|
||||||
'md5': '511377ff8dfa7545307084dca4dce319',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1724883339285544960',
|
'id': '1724883339285544960',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -1182,6 +1186,30 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'_old_archive_ids': ['twitter 1790637656616943991'],
|
'_old_archive_ids': ['twitter 1790637656616943991'],
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# unified_card with 2 items of type video and photo
|
||||||
|
'url': 'https://x.com/TopHeroes_/status/2001950365332455490',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2001841416071450628',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': '2001950365332455490',
|
||||||
|
'title': 'Top Heroes - Forgot to close My heroes solo level up in my phone ✨Unlock the fog,...',
|
||||||
|
'description': r're:Forgot to close My heroes solo level up in my phone ✨Unlock the fog.+',
|
||||||
|
'uploader': 'Top Heroes',
|
||||||
|
'uploader_id': 'TopHeroes_',
|
||||||
|
'uploader_url': 'https://twitter.com/TopHeroes_',
|
||||||
|
'channel_id': '1737324725620326400',
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 30.278,
|
||||||
|
'thumbnail': 'https://pbs.twimg.com/amplify_video_thumb/2001841416071450628/img/hpy5KpJh4pO17b65.jpg?name=orig',
|
||||||
|
'tags': [],
|
||||||
|
'timestamp': 1766137136,
|
||||||
|
'upload_date': '20251219',
|
||||||
|
'_old_archive_ids': ['twitter 2001950365332455490'],
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
@@ -1422,14 +1450,14 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
if not card:
|
if not card:
|
||||||
return
|
return
|
||||||
|
|
||||||
self.write_debug(f'Extracting from card info: {card.get("url")}')
|
card_name = card['name'].split(':')[-1]
|
||||||
|
self.write_debug(f'Extracting from {card_name} card info: {card.get("url")}')
|
||||||
binding_values = card['binding_values']
|
binding_values = card['binding_values']
|
||||||
|
|
||||||
def get_binding_value(k):
|
def get_binding_value(k):
|
||||||
o = binding_values.get(k) or {}
|
o = binding_values.get(k) or {}
|
||||||
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
|
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
|
||||||
|
|
||||||
card_name = card['name'].split(':')[-1]
|
|
||||||
if card_name == 'player':
|
if card_name == 'player':
|
||||||
yield {
|
yield {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
@@ -1461,7 +1489,7 @@ class TwitterIE(TwitterBaseIE):
|
|||||||
elif card_name == 'unified_card':
|
elif card_name == 'unified_card':
|
||||||
unified_card = self._parse_json(get_binding_value('unified_card'), twid)
|
unified_card = self._parse_json(get_binding_value('unified_card'), twid)
|
||||||
yield from map(extract_from_video_info, traverse_obj(
|
yield from map(extract_from_video_info, traverse_obj(
|
||||||
unified_card, ('media_entities', ...), expected_type=dict))
|
unified_card, ('media_entities', lambda _, v: v['type'] == 'video')))
|
||||||
# amplify, promo_video_website, promo_video_convo, appplayer,
|
# amplify, promo_video_website, promo_video_convo, appplayer,
|
||||||
# video_direct_message, poll2choice_video, poll3choice_video,
|
# video_direct_message, poll2choice_video, poll3choice_video,
|
||||||
# poll4choice_video, ...
|
# poll4choice_video, ...
|
||||||
|
|||||||
Reference in New Issue
Block a user