mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[Yahoo/NBCSports] Generalize NBC sports info extractor
This commit is contained in:
		| @@ -310,6 +310,7 @@ from .naver import NaverIE | |||||||
| from .nba import NBAIE | from .nba import NBAIE | ||||||
| from .nbc import ( | from .nbc import ( | ||||||
|     NBCIE, |     NBCIE, | ||||||
|  |     NBCSportsVPlayerIE, | ||||||
|     NBCSportsIE, |     NBCSportsIE, | ||||||
|     NBCNewsIE, |     NBCNewsIE, | ||||||
| ) | ) | ||||||
|   | |||||||
| @@ -50,7 +50,7 @@ class NBCIE(InfoExtractor): | |||||||
|         return self.url_result(theplatform_url) |         return self.url_result(theplatform_url) | ||||||
|  |  | ||||||
|  |  | ||||||
| class NBCSportsIE(InfoExtractor): | class NBCSportsVPlayerIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' |     _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' | ||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
| @@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor): | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _extract_url(webpage): | ||||||
|  |         iframe_m = re.search( | ||||||
|  |             r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) | ||||||
|  |         if iframe_m: | ||||||
|  |             return iframe_m.group('url') | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
| @@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor): | |||||||
|         return self.url_result(theplatform_url, 'ThePlatform') |         return self.url_result(theplatform_url, 'ThePlatform') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NBCSportsIE(InfoExtractor): | ||||||
|  |     # Does not include https becuase its certificate is invalid | ||||||
|  |     _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', | ||||||
|  |         'md5': 'ba6c93f96b67bf05344f78bd523dac0f', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'PHJSaFWbrTY9', | ||||||
|  |             'ext': 'flv', | ||||||
|  |             'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', | ||||||
|  |             'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |         return self.url_result( | ||||||
|  |             NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') | ||||||
|  |  | ||||||
|  |  | ||||||
| class NBCNewsIE(InfoExtractor): | class NBCNewsIE(InfoExtractor): | ||||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ |     _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ | ||||||
|         (?:video/.+?/(?P<id>\d+)| |         (?:video/.+?/(?P<id>\d+)| | ||||||
|   | |||||||
| @@ -17,6 +17,8 @@ from ..utils import ( | |||||||
|     int_or_none, |     int_or_none, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | from .nbc import NBCSportsVPlayerIE | ||||||
|  |  | ||||||
|  |  | ||||||
| class YahooIE(InfoExtractor): | class YahooIE(InfoExtractor): | ||||||
|     IE_DESC = 'Yahoo screen and movies' |     IE_DESC = 'Yahoo screen and movies' | ||||||
| @@ -132,6 +134,7 @@ class YahooIE(InfoExtractor): | |||||||
|         }, { |         }, { | ||||||
|             'note': 'NBC Sports embeds', |             'note': 'NBC Sports embeds', | ||||||
|             'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', |             'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', | ||||||
|  |             'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': '9CsDKds0kvHI', |                 'id': '9CsDKds0kvHI', | ||||||
|                 'ext': 'flv', |                 'ext': 'flv', | ||||||
| @@ -161,10 +164,9 @@ class YahooIE(InfoExtractor): | |||||||
|                 video_id = items[0]['id'] |                 video_id = items[0]['id'] | ||||||
|                 return self._get_info(video_id, display_id, webpage) |                 return self._get_info(video_id, display_id, webpage) | ||||||
|         # Look for NBCSports iframes |         # Look for NBCSports iframes | ||||||
|         iframe_m = re.search( |         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) | ||||||
|             r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) |         if nbc_sports_url: | ||||||
|         if iframe_m: |             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') | ||||||
|             return self.url_result(iframe_m.group('url'), 'NBCSports') |  | ||||||
|  |  | ||||||
|         items_json = self._search_regex( |         items_json = self._search_regex( | ||||||
|             r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, |             r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan