mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[extractor/generic] Extract wistia embed code into separate method
This commit is contained in:
		| @@ -88,6 +88,7 @@ from .rutube import RutubeIE | |||||||
| from .limelight import LimelightBaseIE | from .limelight import LimelightBaseIE | ||||||
| from .anvato import AnvatoIE | from .anvato import AnvatoIE | ||||||
| from .washingtonpost import WashingtonPostIE | from .washingtonpost import WashingtonPostIE | ||||||
|  | from .wistia import WistiaIE | ||||||
|  |  | ||||||
|  |  | ||||||
| class GenericIE(InfoExtractor): | class GenericIE(InfoExtractor): | ||||||
| @@ -2111,36 +2112,15 @@ class GenericIE(InfoExtractor): | |||||||
|                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) |                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) | ||||||
|  |  | ||||||
|         # Look for embedded Wistia player |         # Look for embedded Wistia player | ||||||
|         match = re.search( |         wistia_url = WistiaIE._extract_url(webpage) | ||||||
|             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) |         if wistia_url: | ||||||
|         if match: |  | ||||||
|             embed_url = self._proto_relative_url( |  | ||||||
|                 unescapeHTML(match.group('url'))) |  | ||||||
|             return { |             return { | ||||||
|                 '_type': 'url_transparent', |                 '_type': 'url_transparent', | ||||||
|                 'url': embed_url, |                 'url': self._proto_relative_url(wistia_url), | ||||||
|                 'ie_key': 'Wistia', |                 'ie_key': WistiaIE.ie_key(), | ||||||
|                 'uploader': video_uploader, |                 'uploader': video_uploader, | ||||||
|             } |             } | ||||||
|  |  | ||||||
|         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) |  | ||||||
|         if match: |  | ||||||
|             return { |  | ||||||
|                 '_type': 'url_transparent', |  | ||||||
|                 'url': 'wistia:%s' % match.group('id'), |  | ||||||
|                 'ie_key': 'Wistia', |  | ||||||
|                 'uploader': video_uploader, |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|         match = re.search( |  | ||||||
|             r'''(?sx) |  | ||||||
|                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? |  | ||||||
|                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2 |  | ||||||
|             ''', webpage) |  | ||||||
|         if match: |  | ||||||
|             return self.url_result(self._proto_relative_url( |  | ||||||
|                 'wistia:%s' % match.group('id')), 'Wistia') |  | ||||||
|  |  | ||||||
|         # Look for SVT player |         # Look for SVT player | ||||||
|         svt_url = SVTIE._extract_url(webpage) |         svt_url = SVTIE._extract_url(webpage) | ||||||
|         if svt_url: |         if svt_url: | ||||||
|   | |||||||
| @@ -1,10 +1,13 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|  |     unescapeHTML, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -34,6 +37,25 @@ class WistiaIE(InfoExtractor): | |||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _extract_url(webpage): | ||||||
|  |         match = re.search( | ||||||
|  |             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) | ||||||
|  |         if match: | ||||||
|  |             return unescapeHTML(match.group('url')) | ||||||
|  |  | ||||||
|  |         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) | ||||||
|  |         if match: | ||||||
|  |             return 'wistia:%s' % match.group('id') | ||||||
|  |  | ||||||
|  |         match = re.search( | ||||||
|  |             r'''(?sx) | ||||||
|  |                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? | ||||||
|  |                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2 | ||||||
|  |             ''', webpage) | ||||||
|  |         if match: | ||||||
|  |             return 'wistia:%s' % match.group('id') | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․