mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	Update to ytdl-commit-dfbbe29
[redbulltv] fix embed data extraction
dfbbe2902f
			
			
This commit is contained in:
		| @@ -6,7 +6,7 @@ from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML | ||||
|     merge_dicts, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor): | ||||
|             'title': 'sexy babe softcore', | ||||
|             'thumbnail': r're:https?://.*\.jpg', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|         }, | ||||
|         'skip': 'Video not found', | ||||
|     }, { | ||||
|         'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', | ||||
|         'md5': '1baa9602ede46ce904c431f5418d8916', | ||||
| @@ -77,19 +78,15 @@ class EroProfileIE(InfoExtractor): | ||||
|             [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], | ||||
|             webpage, 'video id', default=None) | ||||
|  | ||||
|         video_url = unescapeHTML(self._search_regex( | ||||
|             r'<source src="([^"]+)', webpage, 'video url')) | ||||
|         title = self._html_search_regex( | ||||
|             r'Title:</th><td>([^<]+)</td>', webpage, 'title') | ||||
|         thumbnail = self._search_regex( | ||||
|             r'onclick="showVideoPlayer\(\)"><img src="([^"]+)', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|             (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'), | ||||
|             webpage, 'title') | ||||
|  | ||||
|         return { | ||||
|         info = self._parse_html5_media_entries(url, webpage, video_id)[0] | ||||
|  | ||||
|         return merge_dicts(info, { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|         }) | ||||
|   | ||||
| @@ -985,6 +985,7 @@ from .platzi import ( | ||||
| from .playfm import PlayFMIE | ||||
| from .playplustv import PlayPlusTVIE | ||||
| from .plays import PlaysTVIE | ||||
| from .playstuff import PlayStuffIE | ||||
| from .playtvak import PlaytvakIE | ||||
| from .playvid import PlayvidIE | ||||
| from .playwire import PlaywireIE | ||||
|   | ||||
| @@ -126,6 +126,7 @@ from .viqeo import ViqeoIE | ||||
| from .expressen import ExpressenIE | ||||
| from .zype import ZypeIE | ||||
| from .odnoklassniki import OdnoklassnikiIE | ||||
| from .vk import VKIE | ||||
| from .kinja import KinjaEmbedIE | ||||
| from .gedidigital import GediDigitalIE | ||||
| from .rcs import RCSEmbedsIE | ||||
| @@ -2252,6 +2253,10 @@ class GenericIE(InfoExtractor): | ||||
|             'playlist_mincount': 52, | ||||
|         }, | ||||
|         { | ||||
|             # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed) | ||||
|             'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', | ||||
|             'only_matching': True, | ||||
|         }, { | ||||
|             # WimTv embed player | ||||
|             'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/', | ||||
|             'info_dict': { | ||||
| @@ -2803,6 +2808,11 @@ class GenericIE(InfoExtractor): | ||||
|         if odnoklassniki_url: | ||||
|             return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) | ||||
|  | ||||
|         # Look for sibnet embedded player | ||||
|         sibnet_urls = VKIE._extract_sibnet_urls(webpage) | ||||
|         if sibnet_urls: | ||||
|             return self.playlist_from_matches(sibnet_urls, video_id, video_title) | ||||
|  | ||||
|         # Look for embedded ivi player | ||||
|         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
| @@ -3454,6 +3464,9 @@ class GenericIE(InfoExtractor): | ||||
|                         'url': src, | ||||
|                         'ext': (mimetype2ext(src_type) | ||||
|                                 or ext if ext in KNOWN_EXTENSIONS else 'mp4'), | ||||
|                         'http_headers': { | ||||
|                             'Referer': full_response.geturl(), | ||||
|                         }, | ||||
|                     }) | ||||
|             if formats: | ||||
|                 self._sort_formats(formats) | ||||
| @@ -3522,7 +3535,7 @@ class GenericIE(InfoExtractor): | ||||
|             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) | ||||
|             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: | ||||
|             if m_video_type is not None: | ||||
|                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)) | ||||
|                 found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage)) | ||||
|         if not found: | ||||
|             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' | ||||
|             found = re.search( | ||||
|   | ||||
| @@ -182,7 +182,7 @@ class ORFRadioIE(InfoExtractor): | ||||
|             duration = end - start if end and start else None | ||||
|             entries.append({ | ||||
|                 'id': loop_stream_id.replace('.mp3', ''), | ||||
|                 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id), | ||||
|                 'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id), | ||||
|                 'title': title, | ||||
|                 'description': clean_html(data.get('subtitle')), | ||||
|                 'duration': duration, | ||||
|   | ||||
| @@ -9,8 +9,9 @@ from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     merge_dicts, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     xpath_text, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -27,10 +28,11 @@ class PhoenixIE(ZDFBaseIE): | ||||
|             'title': 'Wohin führt der Protest in der Pandemie?', | ||||
|             'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', | ||||
|             'duration': 1691, | ||||
|             'timestamp': 1613906100, | ||||
|             'timestamp': 1613902500, | ||||
|             'upload_date': '20210221', | ||||
|             'uploader': 'Phoenix', | ||||
|             'channel': 'corona nachgehakt', | ||||
|             'series': 'corona nachgehakt', | ||||
|             'episode': 'Wohin führt der Protest in der Pandemie?', | ||||
|         }, | ||||
|     }, { | ||||
|         # Youtube embed | ||||
| @@ -79,50 +81,53 @@ class PhoenixIE(ZDFBaseIE): | ||||
|  | ||||
|         video_id = compat_str(video.get('basename') or video.get('content')) | ||||
|  | ||||
|         details = self._download_xml( | ||||
|         details = self._download_json( | ||||
|             'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', | ||||
|             video_id, 'Downloading details XML', query={ | ||||
|             video_id, 'Downloading details JSON', query={ | ||||
|                 'ak': 'web', | ||||
|                 'ptmd': 'true', | ||||
|                 'id': video_id, | ||||
|                 'profile': 'player2', | ||||
|             }) | ||||
|  | ||||
|         title = title or xpath_text( | ||||
|             details, './/information/title', 'title', fatal=True) | ||||
|         content_id = xpath_text( | ||||
|             details, './/video/details/basename', 'content id', fatal=True) | ||||
|         title = title or details['title'] | ||||
|         content_id = details['tracking']['nielsen']['content']['assetid'] | ||||
|  | ||||
|         info = self._extract_ptmd( | ||||
|             'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id, | ||||
|             content_id, None, url) | ||||
|  | ||||
|         timestamp = unified_timestamp(xpath_text(details, './/details/airtime')) | ||||
|         duration = int_or_none(try_get( | ||||
|             details, lambda x: x['tracking']['nielsen']['content']['length'])) | ||||
|         timestamp = unified_timestamp(details.get('editorialDate')) | ||||
|         series = try_get( | ||||
|             details, lambda x: x['tracking']['nielsen']['content']['program'], | ||||
|             compat_str) | ||||
|         episode = title if details.get('contentType') == 'episode' else None | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for node in details.findall('.//teaserimages/teaserimage'): | ||||
|             thumbnail_url = node.text | ||||
|         teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {} | ||||
|         for thumbnail_key, thumbnail_url in teaser_images.items(): | ||||
|             thumbnail_url = urljoin(url, thumbnail_url) | ||||
|             if not thumbnail_url: | ||||
|                 continue | ||||
|             thumbnail = { | ||||
|                 'url': thumbnail_url, | ||||
|             } | ||||
|             thumbnail_key = node.get('key') | ||||
|             if thumbnail_key: | ||||
|                 m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) | ||||
|                 if m: | ||||
|                     thumbnail['width'] = int(m.group(1)) | ||||
|                     thumbnail['height'] = int(m.group(2)) | ||||
|             m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) | ||||
|             if m: | ||||
|                 thumbnail['width'] = int(m.group(1)) | ||||
|                 thumbnail['height'] = int(m.group(2)) | ||||
|             thumbnails.append(thumbnail) | ||||
|  | ||||
|         return merge_dicts(info, { | ||||
|             'id': content_id, | ||||
|             'title': title, | ||||
|             'description': xpath_text(details, './/information/detail'), | ||||
|             'duration': int_or_none(xpath_text(details, './/details/lengthSec')), | ||||
|             'description': details.get('leadParagraph'), | ||||
|             'duration': duration, | ||||
|             'thumbnails': thumbnails, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader': xpath_text(details, './/details/channel'), | ||||
|             'uploader_id': xpath_text(details, './/details/originChannelId'), | ||||
|             'channel': xpath_text(details, './/details/originChannelTitle'), | ||||
|             'uploader': details.get('tvService'), | ||||
|             'series': series, | ||||
|             'episode': episode, | ||||
|         }) | ||||
|   | ||||
							
								
								
									
										65
									
								
								yt_dlp/extractor/playstuff.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								yt_dlp/extractor/playstuff.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PlayStuffIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a', | ||||
|         'md5': 'c82d3669e5247c64bc382577843e5bd0', | ||||
|         'info_dict': { | ||||
|             'id': '6250584958001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga', | ||||
|             'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913', | ||||
|             'uploader_id': '6005208634001', | ||||
|             'timestamp': 1619491027, | ||||
|             'upload_date': '20210427', | ||||
|         }, | ||||
|         'add_ie': ['BrightcoveNew'], | ||||
|     }, { | ||||
|         # geo restricted, bypassable | ||||
|         'url': 'https://play.stuff.co.nz/details/_6155660351001', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         state = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'), | ||||
|             video_id) | ||||
|  | ||||
|         account_id = try_get( | ||||
|             state, lambda x: x['configurations']['accountId'], | ||||
|             compat_str) or '6005208634001' | ||||
|         player_id = try_get( | ||||
|             state, lambda x: x['configurations']['playerId'], | ||||
|             compat_str) or 'default' | ||||
|  | ||||
|         entries = [] | ||||
|         for item_id, video in state['items'].items(): | ||||
|             if not isinstance(video, dict): | ||||
|                 continue | ||||
|             asset_id = try_get( | ||||
|                 video, lambda x: x['content']['attributes']['assetId'], | ||||
|                 compat_str) | ||||
|             if not asset_id: | ||||
|                 continue | ||||
|             entries.append(self.url_result( | ||||
|                 smuggle_url( | ||||
|                     self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id), | ||||
|                     {'geo_countries': ['NZ']}), | ||||
|                 'BrightcoveNew', video_id)) | ||||
|  | ||||
|         return self.playlist_result(entries, video_id) | ||||
| @@ -133,8 +133,10 @@ class RedBullEmbedIE(RedBullTVIE): | ||||
|         rrn_id = self._match_id(url) | ||||
|         asset_id = self._download_json( | ||||
|             'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', | ||||
|             rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'}, | ||||
|             query={ | ||||
|             rrn_id, headers={ | ||||
|                 'Accept': 'application/json', | ||||
|                 'API-KEY': 'e90a1ff11335423998b100c929ecc866', | ||||
|             }, query={ | ||||
|                 'query': '''{ | ||||
|   resource(id: "%s", enforceGeoBlocking: false) { | ||||
|     %s | ||||
|   | ||||
| @@ -21,6 +21,7 @@ from ..utils import ( | ||||
| class ShahidBaseIE(AWSIE): | ||||
|     _AWS_PROXY_HOST = 'api2.shahid.net' | ||||
|     _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' | ||||
|     _VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/' | ||||
|  | ||||
|     def _handle_error(self, e): | ||||
|         fail_data = self._parse_json( | ||||
| @@ -49,7 +50,7 @@ class ShahidBaseIE(AWSIE): | ||||
|  | ||||
| class ShahidIE(ShahidBaseIE): | ||||
|     _NETRC_MACHINE = 'shahid' | ||||
|     _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)' | ||||
|     _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924', | ||||
|         'info_dict': { | ||||
| @@ -73,6 +74,9 @@ class ShahidIE(ShahidBaseIE): | ||||
|         # shahid plus subscriber only | ||||
|         'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
| @@ -168,7 +172,7 @@ class ShahidIE(ShahidBaseIE): | ||||
|  | ||||
|  | ||||
| class ShahidShowIE(ShahidBaseIE): | ||||
|     _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)' | ||||
|     _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', | ||||
|         'info_dict': { | ||||
|   | ||||
| @@ -86,10 +86,10 @@ class SharedIE(SharedBaseIE): | ||||
|  | ||||
| class VivoIE(SharedBaseIE): | ||||
|     IE_DESC = 'vivo.sx' | ||||
|     _VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})' | ||||
|     _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})' | ||||
|     _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://vivo.sx/d7ddda0e78', | ||||
|         'md5': '15b3af41be0b4fe01f4df075c2678b2c', | ||||
|         'info_dict': { | ||||
| @@ -98,7 +98,10 @@ class VivoIE(SharedBaseIE): | ||||
|             'title': 'Chicken', | ||||
|             'filesize': 515659, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://vivo.st/d7ddda0e78', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _extract_title(self, webpage): | ||||
|         title = self._html_search_regex( | ||||
|   | ||||
| @@ -300,6 +300,13 @@ class VKIE(VKBaseIE): | ||||
|             'only_matching': True, | ||||
|         }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_sibnet_urls(webpage): | ||||
|         # https://help.sibnet.ru/?sibnet_video_embed | ||||
|         return [unescapeHTML(mobj.group('url')) for mobj in re.finditer( | ||||
|             r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1', | ||||
|             webpage)] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
| @@ -408,6 +415,10 @@ class VKIE(VKBaseIE): | ||||
|         if odnoklassniki_url: | ||||
|             return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) | ||||
|  | ||||
|         sibnet_urls = self._extract_sibnet_urls(info_page) | ||||
|         if sibnet_urls: | ||||
|             return self.url_result(sibnet_urls[0]) | ||||
|  | ||||
|         m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) | ||||
|         if m_opts: | ||||
|             m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan