mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[vier] Improve extraction
+ Add support for authentication * Bypass authentication when no credentials provded * Improve extraction robustness
This commit is contained in:
		| @@ -5,12 +5,17 @@ import re | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VierIE(InfoExtractor): | ||||
|     IE_NAME = 'vier' | ||||
|     IE_DESC = 'vier.be and vijf.be' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' | ||||
|     _NETRC_MACHINE = 'vier' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', | ||||
|         'info_dict': { | ||||
| @@ -24,19 +29,47 @@ class VierIE(InfoExtractor): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         # 'skip': 'Requires account credentials', | ||||
|     }, { | ||||
|         'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614', | ||||
|         'info_dict': { | ||||
|             'id': '2561614', | ||||
|             'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s', | ||||
|             'title': 'EXTRA: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s', | ||||
|             'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839', | ||||
|         'info_dict': { | ||||
|             'id': '2674839', | ||||
|             'display_id': 'jani-gaat-naar-tokio-aflevering-4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Jani gaat naar Tokio - Aflevering 4', | ||||
|             'description': 'Bekijk hier de volledige vierde aflevering van het 2de seizoen van Jani gaat...', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Requires account credentials', | ||||
|     }, { | ||||
|         'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839', | ||||
|         'info_dict': { | ||||
|             'id': '2674839', | ||||
|             'display_id': 'jani-gaat-naar-tokio-aflevering-4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'jani-gaat-naar-tokio-aflevering-4', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Log in to extract metadata'], | ||||
|     }, { | ||||
|         'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', | ||||
|         'only_matching': True, | ||||
| @@ -45,17 +78,54 @@ class VierIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._logged_in = False | ||||
|  | ||||
|     def _login(self, site): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None or password is None: | ||||
|             return | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             'http://www.%s.be/user/login' % site, | ||||
|             None, note='Logging in', errnote='Unable to log in', | ||||
|             data=urlencode_postdata({ | ||||
|                 'form_id': 'user_login', | ||||
|                 'name': username, | ||||
|                 'pass': password, | ||||
|             }), | ||||
|             headers={'Content-Type': 'application/x-www-form-urlencoded'}) | ||||
|  | ||||
|         login_error = self._html_search_regex( | ||||
|             r'(?s)<div class="messages error">\s*<div>\s*<h2.+?</h2>(.+?)<', | ||||
|             login_page, 'login error', default=None) | ||||
|         if login_error: | ||||
|             self.report_warning('Unable to log in: %s' % login_error) | ||||
|         else: | ||||
|             self._logged_in = True | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         embed_id = mobj.group('embed_id') | ||||
|         display_id = mobj.group('display_id') or embed_id | ||||
|         video_id = mobj.group('id') or embed_id | ||||
|         site = mobj.group('site') | ||||
|  | ||||
|         if not self._logged_in: | ||||
|             self._login(site) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         if r'id="user-login"' in webpage: | ||||
|             self.report_warning( | ||||
|                 'Log in to extract metadata', video_id=display_id) | ||||
|             webpage = self._download_webpage( | ||||
|                 'http://www.%s.be/video/v3/embed/%s' % (site, video_id), | ||||
|                 display_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], | ||||
|             webpage, 'video id') | ||||
|             webpage, 'video id', default=video_id) | ||||
|         application = self._search_regex( | ||||
|             [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], | ||||
|             webpage, 'application', default=site + '_vod') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 mrBliss
					mrBliss