mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[mediaset] fix extraction(closes #16977)
This commit is contained in:
		| @@ -3,75 +3,75 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from .theplatform import ThePlatformBaseIE | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     parse_duration, | ||||
|     try_get, | ||||
|     unified_strdate, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MediasetIE(InfoExtractor): | ||||
| class MediasetIE(ThePlatformBaseIE): | ||||
|     _TP_TLD = 'eu' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     (?: | ||||
|                         mediaset:| | ||||
|                         https?:// | ||||
|                             (?:www\.)?video\.mediaset\.it/ | ||||
|                             (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ | ||||
|                             (?: | ||||
|                                 (?:video|on-demand)/(?:[^/]+/)+[^/]+_| | ||||
|                                 player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid= | ||||
|                                 player/index\.html\?.*?\bprogramGuid= | ||||
|                             ) | ||||
|                     )(?P<id>[0-9]+) | ||||
|                     )(?P<id>[0-9A-Z]{16}) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         # full episode | ||||
|         'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html', | ||||
|         'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824', | ||||
|         'md5': '9b75534d42c44ecef7bf1ffeacb7f85d', | ||||
|         'info_dict': { | ||||
|             'id': '661824', | ||||
|             'id': 'FAFU000000661824', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Quarta puntata', | ||||
|             'description': 'md5:7183696d6df570e3412a5ef74b27c5e2', | ||||
|             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 1414, | ||||
|             'creator': 'mediaset', | ||||
|             'duration': 1414.26, | ||||
|             'upload_date': '20161107', | ||||
|             'series': 'Hello Goodbye', | ||||
|             'categories': ['reality'], | ||||
|             'timestamp': 1478532900, | ||||
|             'uploader': 'Rete 4', | ||||
|             'uploader_id': 'R4', | ||||
|         }, | ||||
|         'expected_warnings': ['is not a supported codec'], | ||||
|     }, { | ||||
|         'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html', | ||||
|         'md5': '1276f966ac423d16ba255ce867de073e', | ||||
|         'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501', | ||||
|         'md5': '288532f0ad18307705b01e581304cd7b', | ||||
|         'info_dict': { | ||||
|             'id': '846685', | ||||
|             'id': 'F309013801000501', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Puntata del 25 maggio', | ||||
|             'description': 'md5:ee2e456e3eb1dba5e814596655bb5296', | ||||
|             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 6565, | ||||
|             'creator': 'mediaset', | ||||
|             'upload_date': '20180525', | ||||
|             'duration': 6565.007, | ||||
|             'upload_date': '20180526', | ||||
|             'series': 'Matrix', | ||||
|             'categories': ['infotainment'], | ||||
|             'timestamp': 1527326245, | ||||
|             'uploader': 'Canale 5', | ||||
|             'uploader_id': 'C5', | ||||
|         }, | ||||
|         'expected_warnings': ['HTTP Error 403: Forbidden'], | ||||
|     }, { | ||||
|         # clip | ||||
|         'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html', | ||||
|         'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # iframe simple | ||||
|         'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true', | ||||
|         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/) | ||||
|         'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true', | ||||
|         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'mediaset:661824', | ||||
|         'url': 'mediaset:FAFU000000665924', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
| @@ -84,61 +84,54 @@ class MediasetIE(InfoExtractor): | ||||
|                 webpage)] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'https://www.video.mediaset.it/html/metainfo.sjson', | ||||
|             video_id, 'Downloading media info', query={ | ||||
|                 'id': video_id | ||||
|             })['video'] | ||||
|  | ||||
|         title = video['title'] | ||||
|         media_id = video.get('guid') or video_id | ||||
|  | ||||
|         video_list = self._download_json( | ||||
|             'http://cdnsel01.mediaset.net/GetCdn2018.aspx', | ||||
|             video_id, 'Downloading video CDN JSON', query={ | ||||
|                 'streamid': media_id, | ||||
|                 'format': 'json', | ||||
|             })['videoList'] | ||||
|         guid = self._match_id(url) | ||||
|         tp_path = 'PR1GhC/media/guid/2702976343/' + guid | ||||
|         info = self._extract_theplatform_metadata(tp_path, guid) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_url in video_list: | ||||
|             ext = determine_ext(format_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     format_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             elif ext == 'mpd': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     format_url, video_id, mpd_id='dash', fatal=False)) | ||||
|             elif ext == 'ism' or '.ism' in format_url: | ||||
|                 formats.extend(self._extract_ism_formats( | ||||
|                     format_url, video_id, ism_id='mss', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'format_id': determine_ext(format_url), | ||||
|                 }) | ||||
|         subtitles = {} | ||||
|         first_e = None | ||||
|         for asset_type in ('SD', 'HD'): | ||||
|             for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'): | ||||
|                 try: | ||||
|                     tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||
|                         update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), { | ||||
|                             'mbr': 'true', | ||||
|                             'formats': f, | ||||
|                             'assetTypes': asset_type, | ||||
|                         }), guid, 'Downloading %s %s SMIL data' % (f, asset_type)) | ||||
|                 except ExtractorError as e: | ||||
|                     if not first_e: | ||||
|                         first_e = e | ||||
|                     break | ||||
|                 for tp_f in tp_formats: | ||||
|                     tp_f['quality'] = 1 if asset_type == 'HD' else 0 | ||||
|                 formats.extend(tp_formats) | ||||
|                 subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|         if first_e and not formats: | ||||
|             raise first_e | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         creator = try_get( | ||||
|             video, lambda x: x['brand-info']['publisher'], compat_str) | ||||
|         category = try_get( | ||||
|             video, lambda x: x['brand-info']['category'], compat_str) | ||||
|         categories = [category] if category else None | ||||
|         fields = [] | ||||
|         for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))): | ||||
|             fields.extend(templ % repl for repl in repls) | ||||
|         feed_data = self._download_json( | ||||
|             'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid, | ||||
|             guid, fatal=False, query={'fields': ','.join(fields)}) | ||||
|         if feed_data: | ||||
|             publish_info = feed_data.get('mediasetprogram$publishInfo') or {} | ||||
|             info.update({ | ||||
|                 'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')), | ||||
|                 'season_number': int_or_none(feed_data.get('tvSeasonNumber')), | ||||
|                 'series': feed_data.get('mediasetprogram$brandTitle'), | ||||
|                 'uploader': publish_info.get('description'), | ||||
|                 'uploader_id': publish_info.get('channel'), | ||||
|                 'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')), | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': video.get('short-description'), | ||||
|             'thumbnail': video.get('thumbnail'), | ||||
|             'duration': parse_duration(video.get('duration')), | ||||
|             'creator': creator, | ||||
|             'upload_date': unified_strdate(video.get('production-date')), | ||||
|             'webpage_url': video.get('url'), | ||||
|             'series': video.get('brand-value'), | ||||
|             'season': video.get('season'), | ||||
|             'categories': categories, | ||||
|         info.update({ | ||||
|             'id': guid, | ||||
|             'formats': formats, | ||||
|         } | ||||
|             'subtitles': subtitles, | ||||
|         }) | ||||
|         return info | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine