mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[CSpan] Add detection for Senate ISVP. Closes #5302
This commit is contained in:
		| @@ -7,7 +7,9 @@ from ..utils import ( | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
|     find_xpath_attr, | ||||
|     smuggle_url, | ||||
| ) | ||||
| from .senateisvp import SenateISVPIE | ||||
|  | ||||
|  | ||||
| class CSpanIE(InfoExtractor): | ||||
| @@ -40,6 +42,15 @@ class CSpanIE(InfoExtractor): | ||||
|             'title': 'General Motors Ignition Switch Recall', | ||||
|         }, | ||||
|         'playlist_duration_sum': 14855, | ||||
|     }, { | ||||
|         # Video from senate.gov | ||||
|         'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', | ||||
|         'md5': '7314c4b96dad66dd8e63dc3518ceaa6f', | ||||
|         'info_dict': { | ||||
|             'id': 'judiciary031715', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Immigration Reforms Needed to Protect Skilled American Workers', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -56,7 +67,7 @@ class CSpanIE(InfoExtractor): | ||||
|                 # present, otherwise this is a stripped version | ||||
|                 r'<p class=\'initial\'>(.*?)</p>' | ||||
|             ], | ||||
|             webpage, 'description', flags=re.DOTALL) | ||||
|             webpage, 'description', flags=re.DOTALL, default=None) | ||||
|  | ||||
|         info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id | ||||
|         data = self._download_json(info_url, video_id) | ||||
| @@ -68,6 +79,11 @@ class CSpanIE(InfoExtractor): | ||||
|         title = find_xpath_attr(doc, './/string', 'name', 'title').text | ||||
|         thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text | ||||
|  | ||||
|         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) | ||||
|         if senate_isvp_url: | ||||
|             surl = smuggle_url(senate_isvp_url, {'force_title': title}) | ||||
|             return self.url_result(surl, 'SenateISVP', video_id, title) | ||||
|  | ||||
|         files = data['video']['files'] | ||||
|  | ||||
|         entries = [{ | ||||
|   | ||||
| @@ -35,6 +35,7 @@ from .rutv import RUTVIE | ||||
| from .smotri import SmotriIE | ||||
| from .condenast import CondeNastIE | ||||
| from .udn import UDNEmbedIE | ||||
| from .senateisvp import SenateISVPIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -1365,6 +1366,11 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result( | ||||
|                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') | ||||
|  | ||||
|         # Look for Senate ISVP iframe | ||||
|         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) | ||||
|         if senate_isvp_url: | ||||
|             return self.url_result(surl, 'SenateISVP') | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|   | ||||
| @@ -3,7 +3,10 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unsmuggle_url, | ||||
| ) | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
| @@ -73,12 +76,22 @@ class SenateISVPIE(InfoExtractor): | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _search_iframe_url(webpage): | ||||
|         mobj = re.search( | ||||
|             r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]", | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _get_info_for_comm(self, committee): | ||||
|         for entry in self._COMM_MAP: | ||||
|             if entry[0] == committee: | ||||
|                 return entry[1:] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|  | ||||
|         qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) | ||||
|         if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): | ||||
|             raise ExtractorError('Invalid URL', expected=True) | ||||
| @@ -87,7 +100,10 @@ class SenateISVPIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id) | ||||
|         if smuggled_data.get('force_title'): | ||||
|             title = smuggled_data['force_title'] | ||||
|         else: | ||||
|             title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id) | ||||
|         poster = qs.get('poster') | ||||
|         if poster: | ||||
|             thumbnail = poster[0] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan