mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     determine_ext, | ||||
| @@ -375,6 +376,35 @@ class PBSIE(InfoExtractor): | ||||
|             }, | ||||
|             'expected_warnings': ['HTTP Error 403: Forbidden'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/', | ||||
|             'info_dict': { | ||||
|                 'id': '3007193718', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster", | ||||
|                 'description': 'md5:37efbac85e0c09b009586523ec143652', | ||||
|                 'duration': 6292, | ||||
|                 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'expected_warnings': ['HTTP Error 403: Forbidden'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/', | ||||
|             'info_dict': { | ||||
|                 'id': '3011407934', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Stories from the Stage - Road Trip', | ||||
|                 'duration': 1619, | ||||
|                 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'expected_warnings': ['HTTP Error 403: Forbidden'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', | ||||
|             'only_matching': True, | ||||
| @@ -438,6 +468,7 @@ class PBSIE(InfoExtractor): | ||||
|                 r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>',  # jwplayer | ||||
|                 r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',", | ||||
|                 r'<div[^>]+\bdata-cove-id=["\'](\d+)"',  # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/ | ||||
|                 r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)',  # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/ | ||||
|             ] | ||||
|  | ||||
|             media_id = self._search_regex( | ||||
| @@ -472,7 +503,8 @@ class PBSIE(InfoExtractor): | ||||
|             if not url: | ||||
|                 url = self._og_search_url(webpage) | ||||
|  | ||||
|             mobj = re.match(self._VALID_URL, url) | ||||
|             mobj = re.match( | ||||
|                 self._VALID_URL, self._proto_relative_url(url.strip())) | ||||
|  | ||||
|         player_id = mobj.group('player_id') | ||||
|         if not display_id: | ||||
| @@ -482,13 +514,27 @@ class PBSIE(InfoExtractor): | ||||
|                 url, display_id, note='Downloading player page', | ||||
|                 errnote='Could not download player page') | ||||
|             video_id = self._search_regex( | ||||
|                 r'<div\s+id="video_([0-9]+)"', player_page, 'video ID') | ||||
|                 r'<div\s+id=["\']video_(\d+)', player_page, 'video ID', | ||||
|                 default=None) | ||||
|             if not video_id: | ||||
|                 video_info = self._extract_video_data( | ||||
|                     player_page, 'video data', display_id) | ||||
|                 video_id = compat_str( | ||||
|                     video_info.get('id') or video_info['contentID']) | ||||
|         else: | ||||
|             video_id = mobj.group('id') | ||||
|             display_id = video_id | ||||
|  | ||||
|         return video_id, display_id, None, description | ||||
|  | ||||
|     def _extract_video_data(self, string, name, video_id, fatal=True): | ||||
|         return self._parse_json( | ||||
|             self._search_regex( | ||||
|                 [r'(?s)PBS\.videoData\s*=\s*({.+?});\n', | ||||
|                  r'window\.videoBridge\s*=\s*({.+?});'], | ||||
|                 string, name, default='{}'), | ||||
|             video_id, transform_source=js_to_json, fatal=fatal) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id, display_id, upload_date, description = self._extract_webpage(url) | ||||
|  | ||||
| @@ -519,11 +565,8 @@ class PBSIE(InfoExtractor): | ||||
|                 'http://player.pbs.org/%s/%s' % (page, video_id), | ||||
|                 display_id, 'Downloading %s page' % page, fatal=False) | ||||
|             if player: | ||||
|                 video_info = self._parse_json( | ||||
|                     self._search_regex( | ||||
|                         [r'(?s)PBS\.videoData\s*=\s*({.+?});\n', r'window\.videoBridge\s*=\s*({.+?});'], | ||||
|                         player, '%s video data' % page, default='{}'), | ||||
|                     display_id, transform_source=js_to_json, fatal=False) | ||||
|                 video_info = self._extract_video_data( | ||||
|                     player, '%s video data' % page, display_id, fatal=False) | ||||
|                 if video_info: | ||||
|                     extract_redirect_urls(video_info) | ||||
|                     if not info: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․