mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[CSpan] Add detection for Senate ISVP. Closes #5302
This commit is contained in:
		| @@ -7,7 +7,9 @@ from ..utils import ( | |||||||
|     int_or_none, |     int_or_none, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|  |     smuggle_url, | ||||||
| ) | ) | ||||||
|  | from .senateisvp import SenateISVPIE | ||||||
|  |  | ||||||
|  |  | ||||||
| class CSpanIE(InfoExtractor): | class CSpanIE(InfoExtractor): | ||||||
| @@ -40,6 +42,15 @@ class CSpanIE(InfoExtractor): | |||||||
|             'title': 'General Motors Ignition Switch Recall', |             'title': 'General Motors Ignition Switch Recall', | ||||||
|         }, |         }, | ||||||
|         'playlist_duration_sum': 14855, |         'playlist_duration_sum': 14855, | ||||||
|  |     }, { | ||||||
|  |         # Video from senate.gov | ||||||
|  |         'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', | ||||||
|  |         'md5': '7314c4b96dad66dd8e63dc3518ceaa6f', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'judiciary031715', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'Immigration Reforms Needed to Protect Skilled American Workers', | ||||||
|  |         } | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @@ -56,7 +67,7 @@ class CSpanIE(InfoExtractor): | |||||||
|                 # present, otherwise this is a stripped version |                 # present, otherwise this is a stripped version | ||||||
|                 r'<p class=\'initial\'>(.*?)</p>' |                 r'<p class=\'initial\'>(.*?)</p>' | ||||||
|             ], |             ], | ||||||
|             webpage, 'description', flags=re.DOTALL) |             webpage, 'description', flags=re.DOTALL, default=None) | ||||||
|  |  | ||||||
|         info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id |         info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id | ||||||
|         data = self._download_json(info_url, video_id) |         data = self._download_json(info_url, video_id) | ||||||
| @@ -68,6 +79,11 @@ class CSpanIE(InfoExtractor): | |||||||
|         title = find_xpath_attr(doc, './/string', 'name', 'title').text |         title = find_xpath_attr(doc, './/string', 'name', 'title').text | ||||||
|         thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text |         thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text | ||||||
|  |  | ||||||
|  |         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) | ||||||
|  |         if senate_isvp_url: | ||||||
|  |             surl = smuggle_url(senate_isvp_url, {'force_title': title}) | ||||||
|  |             return self.url_result(surl, 'SenateISVP', video_id, title) | ||||||
|  |  | ||||||
|         files = data['video']['files'] |         files = data['video']['files'] | ||||||
|  |  | ||||||
|         entries = [{ |         entries = [{ | ||||||
|   | |||||||
| @@ -35,6 +35,7 @@ from .rutv import RUTVIE | |||||||
| from .smotri import SmotriIE | from .smotri import SmotriIE | ||||||
| from .condenast import CondeNastIE | from .condenast import CondeNastIE | ||||||
| from .udn import UDNEmbedIE | from .udn import UDNEmbedIE | ||||||
|  | from .senateisvp import SenateISVPIE | ||||||
|  |  | ||||||
|  |  | ||||||
| class GenericIE(InfoExtractor): | class GenericIE(InfoExtractor): | ||||||
| @@ -1365,6 +1366,11 @@ class GenericIE(InfoExtractor): | |||||||
|             return self.url_result( |             return self.url_result( | ||||||
|                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') |                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') | ||||||
|  |  | ||||||
|  |         # Look for Senate ISVP iframe | ||||||
|  |         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) | ||||||
|  |         if senate_isvp_url: | ||||||
|  |             return self.url_result(surl, 'SenateISVP') | ||||||
|  |  | ||||||
|         def check_video(vurl): |         def check_video(vurl): | ||||||
|             if YoutubeIE.suitable(vurl): |             if YoutubeIE.suitable(vurl): | ||||||
|                 return True |                 return True | ||||||
|   | |||||||
| @@ -3,7 +3,10 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| import re | import re | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ExtractorError | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     unsmuggle_url, | ||||||
|  | ) | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|     compat_parse_qs, |     compat_parse_qs, | ||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
| @@ -73,12 +76,22 @@ class SenateISVPIE(InfoExtractor): | |||||||
|         } |         } | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _search_iframe_url(webpage): | ||||||
|  |         mobj = re.search( | ||||||
|  |             r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]", | ||||||
|  |             webpage) | ||||||
|  |         if mobj: | ||||||
|  |             return mobj.group('url') | ||||||
|  |  | ||||||
|     def _get_info_for_comm(self, committee): |     def _get_info_for_comm(self, committee): | ||||||
|         for entry in self._COMM_MAP: |         for entry in self._COMM_MAP: | ||||||
|             if entry[0] == committee: |             if entry[0] == committee: | ||||||
|                 return entry[1:] |                 return entry[1:] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|  |         url, smuggled_data = unsmuggle_url(url, {}) | ||||||
|  |  | ||||||
|         qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) |         qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) | ||||||
|         if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): |         if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): | ||||||
|             raise ExtractorError('Invalid URL', expected=True) |             raise ExtractorError('Invalid URL', expected=True) | ||||||
| @@ -87,7 +100,10 @@ class SenateISVPIE(InfoExtractor): | |||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id) |         if smuggled_data.get('force_title'): | ||||||
|  |             title = smuggled_data['force_title'] | ||||||
|  |         else: | ||||||
|  |             title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id) | ||||||
|         poster = qs.get('poster') |         poster = qs.get('poster') | ||||||
|         if poster: |         if poster: | ||||||
|             thumbnail = poster[0] |             thumbnail = poster[0] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan