mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[sbs] Add new extractor (Fixes #3566)
This commit is contained in:
		| @@ -276,6 +276,7 @@ from .rutube import ( | |||||||
| from .rutv import RUTVIE | from .rutv import RUTVIE | ||||||
| from .sapo import SapoIE | from .sapo import SapoIE | ||||||
| from .savefrom import SaveFromIE | from .savefrom import SaveFromIE | ||||||
|  | from .sbs import SBSIE | ||||||
| from .scivee import SciVeeIE | from .scivee import SciVeeIE | ||||||
| from .screencast import ScreencastIE | from .screencast import ScreencastIE | ||||||
| from .servingsys import ServingSysIE | from .servingsys import ServingSysIE | ||||||
|   | |||||||
| @@ -731,6 +731,13 @@ class GenericIE(InfoExtractor): | |||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             return self.url_result(mobj.group('url'), 'Yahoo') |             return self.url_result(mobj.group('url'), 'Yahoo') | ||||||
|  |  | ||||||
|  |         # Look for embedded sbs.com.au player | ||||||
|  |         mobj = re.search( | ||||||
|  |             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1', | ||||||
|  |             webpage) | ||||||
|  |         if mobj is not None: | ||||||
|  |             return self.url_result(mobj.group('url'), 'SBS') | ||||||
|  |  | ||||||
|         # Start with something easy: JW Player in SWFObject |         # Start with something easy: JW Player in SWFObject | ||||||
|         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) |         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||||
|         if not found: |         if not found: | ||||||
|   | |||||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/sbs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/sbs.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import json | ||||||
|  | import re | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     js_to_json, | ||||||
|  |     remove_end, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SBSIE(InfoExtractor): | ||||||
|  |     IE_DESC = 'sbs.com.au' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/' | ||||||
|  |  | ||||||
|  |     _TESTS = [{ | ||||||
|  |         # Original URL is handled by the generic IE which finds the iframe: | ||||||
|  |         # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation | ||||||
|  |         'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', | ||||||
|  |         'md5': '3150cf278965eeabb5b4cea1c963fe0a', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '320403011771', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'Dingo Conservation', | ||||||
|  |             'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction', | ||||||
|  |             'thumbnail': 're:http://.*\.jpg', | ||||||
|  |         }, | ||||||
|  |         'add_ies': ['generic'], | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         release_urls_json = js_to_json(self._search_regex( | ||||||
|  |             r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n', | ||||||
|  |             webpage, '')) | ||||||
|  |         release_urls = json.loads(release_urls_json) | ||||||
|  |         theplatform_url = ( | ||||||
|  |             release_urls.get('progressive') or release_urls.get('standard')) | ||||||
|  |  | ||||||
|  |         title = remove_end(self._og_search_title(webpage), ' (The Feed)') | ||||||
|  |         description = self._html_search_meta('description', webpage) | ||||||
|  |         thumbnail = self._og_search_thumbnail(webpage) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'url_transparent', | ||||||
|  |             'id': video_id, | ||||||
|  |             'url': theplatform_url, | ||||||
|  |  | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |         } | ||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister