mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			216 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			216 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from .common import InfoExtractor
 | |
| from ..utils import (
 | |
|     ExtractorError,
 | |
|     clean_html,
 | |
|     determine_ext,
 | |
|     int_or_none,
 | |
|     parse_iso8601,
 | |
|     url_or_none,
 | |
| )
 | |
| from ..utils.traversal import traverse_obj
 | |
| 
 | |
| 
 | |
| class MSNIE(InfoExtractor):
 | |
|     _VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/?#]+/)+(?P<display_id>[^/?#]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
 | |
|     _TESTS = [{
 | |
|         'url': 'https://www.msn.com/en-gb/video/news/president-macron-interrupts-trump-over-ukraine-funding/vi-AA1zMcD7',
 | |
|         'info_dict': {
 | |
|             'id': 'AA1zMcD7',
 | |
|             'ext': 'mp4',
 | |
|             'display_id': 'president-macron-interrupts-trump-over-ukraine-funding',
 | |
|             'title': 'President Macron interrupts Trump over Ukraine funding',
 | |
|             'description': 'md5:5fd3857ac25849e7a56cb25fbe1a2a8b',
 | |
|             'uploader': 'k! News UK',
 | |
|             'uploader_id': 'BB1hz5Rj',
 | |
|             'duration': 59,
 | |
|             'thumbnail': 'https://img-s-msn-com.akamaized.net/tenant/amp/entityid/AA1zMagX.img',
 | |
|             'tags': 'count:14',
 | |
|             'timestamp': 1740510914,
 | |
|             'upload_date': '20250225',
 | |
|             'release_timestamp': 1740513600,
 | |
|             'release_date': '20250225',
 | |
|             'modified_timestamp': 1741413241,
 | |
|             'modified_date': '20250308',
 | |
|         },
 | |
|     }, {
 | |
|         'url': 'https://www.msn.com/en-gb/video/watch/films-success-saved-adam-pearsons-acting-career/vi-AA1znZGE?ocid=hpmsn',
 | |
|         'info_dict': {
 | |
|             'id': 'AA1znZGE',
 | |
|             'ext': 'mp4',
 | |
|             'display_id': 'films-success-saved-adam-pearsons-acting-career',
 | |
|             'title': "Films' success saved Adam Pearson's acting career",
 | |
|             'description': 'md5:98c05f7bd9ab4f9c423400f62f2d3da5',
 | |
|             'uploader': 'Sky News',
 | |
|             'uploader_id': 'AA2eki',
 | |
|             'duration': 52,
 | |
|             'thumbnail': 'https://img-s-msn-com.akamaized.net/tenant/amp/entityid/AA1zo7nU.img',
 | |
|             'timestamp': 1739993965,
 | |
|             'upload_date': '20250219',
 | |
|             'release_timestamp': 1739977753,
 | |
|             'release_date': '20250219',
 | |
|             'modified_timestamp': 1742076259,
 | |
|             'modified_date': '20250315',
 | |
|         },
 | |
|     }, {
 | |
|         'url': 'https://www.msn.com/en-us/entertainment/news/rock-frontman-replacements-you-might-not-know-happened/vi-AA1yLVcD',
 | |
|         'info_dict': {
 | |
|             'id': 'AA1yLVcD',
 | |
|             'ext': 'mp4',
 | |
|             'display_id': 'rock-frontman-replacements-you-might-not-know-happened',
 | |
|             'title': 'Rock Frontman Replacements You Might Not Know Happened',
 | |
|             'description': 'md5:451a125496ff0c9f6816055bb1808da9',
 | |
|             'uploader': 'Grunge (Video)',
 | |
|             'uploader_id': 'BB1oveoV',
 | |
|             'duration': 596,
 | |
|             'thumbnail': 'https://img-s-msn-com.akamaized.net/tenant/amp/entityid/AA1yM4OJ.img',
 | |
|             'timestamp': 1739223456,
 | |
|             'upload_date': '20250210',
 | |
|             'release_timestamp': 1739219731,
 | |
|             'release_date': '20250210',
 | |
|             'modified_timestamp': 1741427272,
 | |
|             'modified_date': '20250308',
 | |
|         },
 | |
|     }, {
 | |
|         # Dailymotion Embed
 | |
|         'url': 'https://www.msn.com/de-de/nachrichten/other/the-first-descendant-gameplay-trailer-zu-serena-der-neuen-gefl%C3%BCgelten-nachfahrin/vi-AA1B1d06',
 | |
|         'info_dict': {
 | |
|             'id': 'x9g6oli',
 | |
|             'ext': 'mp4',
 | |
|             'title': 'The First Descendant: Gameplay-Trailer zu Serena, der neuen geflügelten Nachfahrin',
 | |
|             'description': '',
 | |
|             'uploader': 'MeinMMO',
 | |
|             'uploader_id': 'x2mvqi4',
 | |
|             'view_count': int,
 | |
|             'like_count': int,
 | |
|             'age_limit': 0,
 | |
|             'duration': 60,
 | |
|             'thumbnail': 'https://s1.dmcdn.net/v/Y3fO61drj56vPB9SS/x1080',
 | |
|             'tags': ['MeinMMO', 'The First Descendant'],
 | |
|             'timestamp': 1742124877,
 | |
|             'upload_date': '20250316',
 | |
|         },
 | |
|     }, {
 | |
|         # Youtube Embed
 | |
|         'url': 'https://www.msn.com/en-gb/video/webcontent/web-content/vi-AA1ybFaJ',
 | |
|         'info_dict': {
 | |
|             'id': 'kQSChWu95nE',
 | |
|             'ext': 'mp4',
 | |
|             'title': '7 Daily Habits to Nurture Your Personal Growth',
 | |
|             'description': 'md5:6f233c68341b74dee30c8c121924e827',
 | |
|             'uploader': 'TopThink',
 | |
|             'uploader_id': '@TopThink',
 | |
|             'uploader_url': 'https://www.youtube.com/@TopThink',
 | |
|             'channel': 'TopThink',
 | |
|             'channel_id': 'UCMlGmHokrQRp-RaNO7aq4Uw',
 | |
|             'channel_url': 'https://www.youtube.com/channel/UCMlGmHokrQRp-RaNO7aq4Uw',
 | |
|             'channel_is_verified': True,
 | |
|             'channel_follower_count': int,
 | |
|             'comment_count': int,
 | |
|             'view_count': int,
 | |
|             'like_count': int,
 | |
|             'age_limit': 0,
 | |
|             'duration': 705,
 | |
|             'thumbnail': 'https://i.ytimg.com/vi/kQSChWu95nE/maxresdefault.jpg',
 | |
|             'categories': ['Howto & Style'],
 | |
|             'tags': ['topthink', 'top think', 'personal growth'],
 | |
|             'timestamp': 1722711620,
 | |
|             'upload_date': '20240803',
 | |
|             'playable_in_embed': True,
 | |
|             'availability': 'public',
 | |
|             'live_status': 'not_live',
 | |
|         },
 | |
|     }, {
 | |
|         # Article with social embed
 | |
|         'url': 'https://www.msn.com/en-in/news/techandscience/watch-earth-sets-and-rises-behind-moon-in-breathtaking-blue-ghost-video/ar-AA1zKoAc',
 | |
|         'info_dict': {
 | |
|             'id': 'AA1zKoAc',
 | |
|             'title': 'Watch: Earth sets and rises behind Moon in breathtaking Blue Ghost video',
 | |
|             'description': 'md5:0ad51cfa77e42e7f0c46cf98a619dbbf',
 | |
|             'uploader': 'India Today',
 | |
|             'uploader_id': 'AAyFWG',
 | |
|             'tags': 'count:11',
 | |
|             'timestamp': 1740485034,
 | |
|             'upload_date': '20250225',
 | |
|             'release_timestamp': 1740484875,
 | |
|             'release_date': '20250225',
 | |
|             'modified_timestamp': 1740488561,
 | |
|             'modified_date': '20250225',
 | |
|         },
 | |
|         'playlist_count': 1,
 | |
|     }]
 | |
| 
 | |
|     def _real_extract(self, url):
 | |
|         locale, display_id, page_id = self._match_valid_url(url).group('locale', 'display_id', 'id')
 | |
| 
 | |
|         json_data = self._download_json(
 | |
|             f'https://assets.msn.com/content/view/v2/Detail/{locale}/{page_id}', page_id)
 | |
| 
 | |
|         common_metadata = traverse_obj(json_data, {
 | |
|             'title': ('title', {str}),
 | |
|             'description': (('abstract', ('body', {clean_html})), {str}, filter, any),
 | |
|             'timestamp': ('createdDateTime', {parse_iso8601}),
 | |
|             'release_timestamp': ('publishedDateTime', {parse_iso8601}),
 | |
|             'modified_timestamp': ('updatedDateTime', {parse_iso8601}),
 | |
|             'thumbnail': ('thumbnail', 'image', 'url', {url_or_none}),
 | |
|             'duration': ('videoMetadata', 'playTime', {int_or_none}),
 | |
|             'tags': ('keywords', ..., {str}),
 | |
|             'uploader': ('provider', 'name', {str}),
 | |
|             'uploader_id': ('provider', 'id', {str}),
 | |
|         })
 | |
| 
 | |
|         page_type = json_data['type']
 | |
|         source_url = traverse_obj(json_data, ('sourceHref', {url_or_none}))
 | |
|         if page_type == 'video':
 | |
|             if traverse_obj(json_data, ('thirdPartyVideoPlayer', 'enabled')) and source_url:
 | |
|                 return self.url_result(source_url)
 | |
|             formats = []
 | |
|             subtitles = {}
 | |
|             for file in traverse_obj(json_data, ('videoMetadata', 'externalVideoFiles', lambda _, v: url_or_none(v['url']))):
 | |
|                 file_url = file['url']
 | |
|                 ext = determine_ext(file_url)
 | |
|                 if ext == 'm3u8':
 | |
|                     fmts, subs = self._extract_m3u8_formats_and_subtitles(
 | |
|                         file_url, page_id, 'mp4', m3u8_id='hls', fatal=False)
 | |
|                     formats.extend(fmts)
 | |
|                     self._merge_subtitles(subs, target=subtitles)
 | |
|                 elif ext == 'mpd':
 | |
|                     fmts, subs = self._extract_mpd_formats_and_subtitles(
 | |
|                         file_url, page_id, mpd_id='dash', fatal=False)
 | |
|                     formats.extend(fmts)
 | |
|                     self._merge_subtitles(subs, target=subtitles)
 | |
|                 else:
 | |
|                     formats.append(
 | |
|                         traverse_obj(file, {
 | |
|                             'url': 'url',
 | |
|                             'format_id': ('format', {str}),
 | |
|                             'filesize': ('fileSize', {int_or_none}),
 | |
|                             'height': ('height', {int_or_none}),
 | |
|                             'width': ('width', {int_or_none}),
 | |
|                         }))
 | |
|             for caption in traverse_obj(json_data, ('videoMetadata', 'closedCaptions', lambda _, v: url_or_none(v['href']))):
 | |
|                 lang = caption.get('locale') or 'en-us'
 | |
|                 subtitles.setdefault(lang, []).append({
 | |
|                     'url': caption['href'],
 | |
|                     'ext': 'ttml',
 | |
|                 })
 | |
| 
 | |
|             return {
 | |
|                 'id': page_id,
 | |
|                 'display_id': display_id,
 | |
|                 'formats': formats,
 | |
|                 'subtitles': subtitles,
 | |
|                 **common_metadata,
 | |
|             }
 | |
|         elif page_type == 'webcontent':
 | |
|             if not source_url:
 | |
|                 raise ExtractorError('Could not find source URL')
 | |
|             return self.url_result(source_url)
 | |
|         elif page_type == 'article':
 | |
|             entries = []
 | |
|             for embed_url in traverse_obj(json_data, ('socialEmbeds', ..., 'postUrl', {url_or_none})):
 | |
|                 entries.append(self.url_result(embed_url))
 | |
| 
 | |
|             return self.playlist_result(entries, page_id, **common_metadata)
 | |
| 
 | |
|         raise ExtractorError(f'Unsupported page type: {page_type}')
 | 
