mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[wistia] Add extractor
This commit is contained in:
		| @@ -488,7 +488,8 @@ class YoutubeDL(object): | |||||||
|                 new_result = ie_result.copy() |                 new_result = ie_result.copy() | ||||||
|                 for f in ('_type', 'url', 'ext', 'player_url', 'formats', |                 for f in ('_type', 'url', 'ext', 'player_url', 'formats', | ||||||
|                           'entries', 'urlhandle', 'ie_key', 'duration', |                           'entries', 'urlhandle', 'ie_key', 'duration', | ||||||
|                           'subtitles', 'annotations', 'format'): |                           'subtitles', 'annotations', 'format', | ||||||
|  |                           'thumbnail', 'thumbnails'): | ||||||
|                     if f in new_result: |                     if f in new_result: | ||||||
|                         del new_result[f] |                         del new_result[f] | ||||||
|                     if f in embedded_info: |                     if f in embedded_info: | ||||||
|   | |||||||
| @@ -178,6 +178,7 @@ from .wat import WatIE | |||||||
| from .websurg import WeBSurgIE | from .websurg import WeBSurgIE | ||||||
| from .weibo import WeiboIE | from .weibo import WeiboIE | ||||||
| from .wimp import WimpIE | from .wimp import WimpIE | ||||||
|  | from .wistia import WistiaIE | ||||||
| from .worldstarhiphop import WorldStarHipHopIE | from .worldstarhiphop import WorldStarHipHopIE | ||||||
| from .xhamster import XHamsterIE | from .xhamster import XHamsterIE | ||||||
| from .xnxx import XNXXIE | from .xnxx import XNXXIE | ||||||
|   | |||||||
| @@ -169,8 +169,13 @@ class GenericIE(InfoExtractor): | |||||||
|         #   Site Name | Video Title |         #   Site Name | Video Title | ||||||
|         #   Video Title - Tagline | Site Name |         #   Video Title - Tagline | Site Name | ||||||
|         # and so on and so forth; it's just not practical |         # and so on and so forth; it's just not practical | ||||||
|         video_title = self._html_search_regex(r'<title>(.*)</title>', |         video_title = self._html_search_regex( | ||||||
|             webpage, u'video title', default=u'video', flags=re.DOTALL) |             r'(?s)<title>(.*?)</title>', webpage, u'video title', | ||||||
|  |             default=u'video') | ||||||
|  |  | ||||||
|  |         # video uploader is domain name | ||||||
|  |         video_uploader = self._search_regex( | ||||||
|  |             r'^(?:https?://)?([^/]*)/.*', url, u'video uploader') | ||||||
|  |  | ||||||
|         # Look for BrightCove: |         # Look for BrightCove: | ||||||
|         bc_url = BrightcoveIE._extract_brightcove_url(webpage) |         bc_url = BrightcoveIE._extract_brightcove_url(webpage) | ||||||
| @@ -188,7 +193,7 @@ class GenericIE(InfoExtractor): | |||||||
|  |  | ||||||
|         # Look for embedded YouTube player |         # Look for embedded YouTube player | ||||||
|         matches = re.findall( |         matches = re.findall( | ||||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage) |             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage) | ||||||
|         if matches: |         if matches: | ||||||
|             urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') |             urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') | ||||||
|                      for tuppl in matches] |                      for tuppl in matches] | ||||||
| @@ -197,13 +202,26 @@ class GenericIE(InfoExtractor): | |||||||
|  |  | ||||||
|         # Look for embedded Dailymotion player |         # Look for embedded Dailymotion player | ||||||
|         matches = re.findall( |         matches = re.findall( | ||||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage) |             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) | ||||||
|         if matches: |         if matches: | ||||||
|             urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion') |             urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion') | ||||||
|                      for tuppl in matches] |                      for tuppl in matches] | ||||||
|             return self.playlist_result( |             return self.playlist_result( | ||||||
|                 urlrs, playlist_id=video_id, playlist_title=video_title) |                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||||
|  |  | ||||||
|  |         # Look for embedded Wistia player | ||||||
|  |         match = re.search( | ||||||
|  |             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) | ||||||
|  |         if match: | ||||||
|  |             return { | ||||||
|  |                 '_type': 'url_transparent', | ||||||
|  |                 'url': unescapeHTML(match.group('url')), | ||||||
|  |                 'ie_key': 'Wistia', | ||||||
|  |                 'uploader': video_uploader, | ||||||
|  |                 'title': video_title, | ||||||
|  |                 'id': video_id, | ||||||
|  |             } | ||||||
|  |  | ||||||
|         # Look for Bandcamp pages with custom domain |         # Look for Bandcamp pages with custom domain | ||||||
|         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) |         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) | ||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
| @@ -247,14 +265,9 @@ class GenericIE(InfoExtractor): | |||||||
|         # here's a fun little line of code for you: |         # here's a fun little line of code for you: | ||||||
|         video_id = os.path.splitext(video_id)[0] |         video_id = os.path.splitext(video_id)[0] | ||||||
|  |  | ||||||
|         # video uploader is domain name |  | ||||||
|         video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', |  | ||||||
|             url, u'video uploader') |  | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id':       video_id, |             'id':       video_id, | ||||||
|             'url':      video_url, |             'url':      video_url, | ||||||
|             'uploader': video_uploader, |             'uploader': video_uploader, | ||||||
|             'upload_date':  None, |  | ||||||
|             'title':    video_title, |             'title':    video_title, | ||||||
|         } |         } | ||||||
|   | |||||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/wistia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/wistia.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | |||||||
|  | import json | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class WistiaIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt", | ||||||
|  |         u"file": u"sh7fpupwlt.mov", | ||||||
|  |         u"md5": u"cafeb56ec0c53c18c97405eecb3133df", | ||||||
|  |         u"info_dict": { | ||||||
|  |             u"title": u"cfh_resourceful_zdkh_final_1" | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |         data_json = self._html_search_regex( | ||||||
|  |             r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data') | ||||||
|  |  | ||||||
|  |         data = json.loads(data_json) | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         thumbnails = [] | ||||||
|  |         for atype, a in data['assets'].items(): | ||||||
|  |             if atype == 'still': | ||||||
|  |                 thumbnails.append({ | ||||||
|  |                     'url': a['url'], | ||||||
|  |                     'resolution': '%dx%d' % (a['width'], a['height']), | ||||||
|  |                 }) | ||||||
|  |                 continue | ||||||
|  |             if atype == 'preview': | ||||||
|  |                 continue | ||||||
|  |             formats.append({ | ||||||
|  |                 'format_id': atype, | ||||||
|  |                 'url': a['url'], | ||||||
|  |                 'width': a['width'], | ||||||
|  |                 'height': a['height'], | ||||||
|  |                 'filesize': a['size'], | ||||||
|  |                 'ext': a['ext'], | ||||||
|  |             }) | ||||||
|  |         formats.sort(key=lambda a: a['filesize']) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': data['name'], | ||||||
|  |             'formats': formats, | ||||||
|  |             'thumbnails': thumbnails, | ||||||
|  |         } | ||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister