mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[Vulture] Add support for vulture.com
This commit is contained in:
		| @@ -332,6 +332,7 @@ from .viki import VikiIE | ||||
| from .vk import VKIE | ||||
| from .vube import VubeIE | ||||
| from .vuclip import VuClipIE | ||||
| from .vulture import VultureIE | ||||
| from .washingtonpost import WashingtonPostIE | ||||
| from .wat import WatIE | ||||
| from .wdr import ( | ||||
|   | ||||
| @@ -668,6 +668,14 @@ class GenericIE(InfoExtractor): | ||||
|             url = unescapeHTML(mobj.group('url')) | ||||
|             return self.url_result(url) | ||||
|  | ||||
|         # Look for embedded vulture.com player | ||||
|         mobj = re.search( | ||||
|             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             url = unescapeHTML(mobj.group('url')) | ||||
|             return self.url_result(url, ie='Vulture') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if not found: | ||||
|   | ||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/vulture.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/vulture.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import os.path | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VultureIE(InfoExtractor): | ||||
|     IE_NAME = 'vulture.com' | ||||
|     _VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1', | ||||
|         'md5': '8d997845642a2b5152820f7257871bc8', | ||||
|         'info_dict': { | ||||
|             'id': '6GHRQL3RV7MSD1H4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED', | ||||
|             'uploader_id': 'Sarah', | ||||
|             'thumbnail': 're:^http://.*\.jpg$', | ||||
|             'timestamp': 1401288564, | ||||
|             'upload_date': '20140528', | ||||
|             'description': 'Uplifting and witty, as predicted.', | ||||
|             'duration': 1015, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         query_string = self._search_regex( | ||||
|             r"queryString\s*=\s*'([^']+)'", webpage, 'query string') | ||||
|         video_id = self._search_regex( | ||||
|             r'content=([^&]+)', query_string, 'video ID') | ||||
|         query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string | ||||
|  | ||||
|         query_webpage = self._download_webpage( | ||||
|             query_url, display_id, note='Downloading query page') | ||||
|         params_json = self._search_regex( | ||||
|             r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n', | ||||
|             query_webpage, | ||||
|             'player params') | ||||
|         params = json.loads(params_json) | ||||
|  | ||||
|         upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T')) | ||||
|         uploader_id = params.get('user', {}).get('handle') | ||||
|  | ||||
|         media_item = params['media_item'] | ||||
|         title = os.path.splitext(media_item['title'])[0] | ||||
|         duration = int_or_none(media_item.get('duration_seconds')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': media_item['pipeline_xid'], | ||||
|             'title': title, | ||||
|             'timestamp': upload_timestamp, | ||||
|             'thumbnail': params.get('thumbnail_url'), | ||||
|             'uploader_id': uploader_id, | ||||
|             'description': params.get('description'), | ||||
|             'duration': duration, | ||||
|         } | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister