mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[huffpost] Add support
This commit is contained in:
		| @@ -1,3 +1,5 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| from .common import FileDownloader | from .common import FileDownloader | ||||||
| from .hls import HlsFD | from .hls import HlsFD | ||||||
| from .http import HttpFD | from .http import HttpFD | ||||||
| @@ -12,10 +14,11 @@ from ..utils import ( | |||||||
| def get_suitable_downloader(info_dict): | def get_suitable_downloader(info_dict): | ||||||
|     """Get the downloader class that can handle the info dict.""" |     """Get the downloader class that can handle the info dict.""" | ||||||
|     url = info_dict['url'] |     url = info_dict['url'] | ||||||
|  |     protocol = info_dict.get('protocol') | ||||||
|  |  | ||||||
|     if url.startswith('rtmp'): |     if url.startswith('rtmp'): | ||||||
|         return RtmpFD |         return RtmpFD | ||||||
|     if determine_ext(url) == u'm3u8': |     if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'): | ||||||
|         return HlsFD |         return HlsFD | ||||||
|     if url.startswith('mms') or url.startswith('rtsp'): |     if url.startswith('mms') or url.startswith('rtsp'): | ||||||
|         return MplayerFD |         return MplayerFD | ||||||
|   | |||||||
| @@ -83,6 +83,7 @@ from .googlesearch import GoogleSearchIE | |||||||
| from .hark import HarkIE | from .hark import HarkIE | ||||||
| from .hotnewhiphop import HotNewHipHopIE | from .hotnewhiphop import HotNewHipHopIE | ||||||
| from .howcast import HowcastIE | from .howcast import HowcastIE | ||||||
|  | from .huffpost import HuffPostIE | ||||||
| from .hypem import HypemIE | from .hypem import HypemIE | ||||||
| from .ign import IGNIE, OneUPIE | from .ign import IGNIE, OneUPIE | ||||||
| from .imdb import ( | from .imdb import ( | ||||||
|   | |||||||
| @@ -71,7 +71,7 @@ class InfoExtractor(object): | |||||||
|                     * player_url SWF Player URL (used for rtmpdump). |                     * player_url SWF Player URL (used for rtmpdump). | ||||||
|                     * protocol   The protocol that will be used for the actual |                     * protocol   The protocol that will be used for the actual | ||||||
|                                  download, lower-case. |                                  download, lower-case. | ||||||
|                                  "http", "https", "rtsp", "rtmp" or so. |                                  "http", "https", "rtsp", "rtmp", "m3u8" or so. | ||||||
|                     * preference Order number of this format. If this field is |                     * preference Order number of this format. If this field is | ||||||
|                                  present and not None, the formats get sorted |                                  present and not None, the formats get sorted | ||||||
|                                  by this field. |                                  by this field. | ||||||
|   | |||||||
| @@ -332,10 +332,16 @@ class GenericIE(InfoExtractor): | |||||||
|  |  | ||||||
|         # Look for embedded Facebook player |         # Look for embedded Facebook player | ||||||
|         mobj = re.search( |         mobj = re.search( | ||||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage) |             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) | ||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             return self.url_result(mobj.group('url'), 'Facebook') |             return self.url_result(mobj.group('url'), 'Facebook') | ||||||
|  |  | ||||||
|  |         # Look for embedded Huffington Post player | ||||||
|  |         mobj = re.search( | ||||||
|  |             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage) | ||||||
|  |         if mobj is not None: | ||||||
|  |             return self.url_result(mobj.group('url'), 'HuffPost') | ||||||
|  |  | ||||||
|         # Start with something easy: JW Player in SWFObject |         # Start with something easy: JW Player in SWFObject | ||||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) |         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||||
|         if mobj is None: |         if mobj is None: | ||||||
|   | |||||||
							
								
								
									
										70
									
								
								youtube_dl/extractor/huffpost.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								youtube_dl/extractor/huffpost.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     parse_duration, | ||||||
|  |     unified_strdate, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class HuffPostIE(InfoExtractor): | ||||||
|  |     IE_DESC = 'Huffington Post' | ||||||
|  |     _VALID_URL = r'''(?x) | ||||||
|  |         https?://(embed\.)?live\.huffingtonpost\.com/ | ||||||
|  |         (?: | ||||||
|  |             r/segment/[^/]+/| | ||||||
|  |             HPLEmbedPlayer/\?segmentId= | ||||||
|  |         ) | ||||||
|  |         (?P<id>[0-9a-f]+)''' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677', | ||||||
|  |         'file': '52dd3e4b02a7602131000677.mp4', | ||||||
|  |         'md5': 'TODO', | ||||||
|  |         'info_dict': { | ||||||
|  |             'title': 'TODO', | ||||||
|  |             'description': 'TODO', | ||||||
|  |             'duration': 1549, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |  | ||||||
|  |         api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id | ||||||
|  |         data = self._download_json(api_url, video_id)['data'] | ||||||
|  |  | ||||||
|  |         video_title = data['title'] | ||||||
|  |         duration = parse_duration(data['running_time']) | ||||||
|  |         upload_date = unified_strdate(data['schedule']['started_at']) | ||||||
|  |  | ||||||
|  |         thumbnails = [] | ||||||
|  |         for url in data['images'].values(): | ||||||
|  |             m = re.match('.*-([0-9]+x[0-9]+)\.', url) | ||||||
|  |             if not m: | ||||||
|  |                 continue | ||||||
|  |             thumbnails.append({ | ||||||
|  |                 'url': url, | ||||||
|  |                 'resolution': m.group(1), | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         formats = [{ | ||||||
|  |             'format': key, | ||||||
|  |             'format_id': key.replace('/', '.'), | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'url': url, | ||||||
|  |             'vcodec': 'none' if key.startswith('audio/') else None, | ||||||
|  |         } for key, url in data['sources']['live'].items()] | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': video_title, | ||||||
|  |             'formats': formats, | ||||||
|  |             'duration': duration, | ||||||
|  |             'upload_date': upload_date, | ||||||
|  |             'thumbnails': thumbnails, | ||||||
|  |         } | ||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister