mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 08:35:12 +00:00 
			
		
		
		
	[Yahoo/NBCSports] Generalize NBC sports info extractor
This commit is contained in:
		@@ -310,6 +310,7 @@ from .naver import NaverIE
 | 
				
			|||||||
from .nba import NBAIE
 | 
					from .nba import NBAIE
 | 
				
			||||||
from .nbc import (
 | 
					from .nbc import (
 | 
				
			||||||
    NBCIE,
 | 
					    NBCIE,
 | 
				
			||||||
 | 
					    NBCSportsVPlayerIE,
 | 
				
			||||||
    NBCSportsIE,
 | 
					    NBCSportsIE,
 | 
				
			||||||
    NBCNewsIE,
 | 
					    NBCNewsIE,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -50,7 +50,7 @@ class NBCIE(InfoExtractor):
 | 
				
			|||||||
        return self.url_result(theplatform_url)
 | 
					        return self.url_result(theplatform_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class NBCSportsIE(InfoExtractor):
 | 
					class NBCSportsVPlayerIE(InfoExtractor):
 | 
				
			||||||
    _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
 | 
					    _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    _TEST = {
 | 
					    _TEST = {
 | 
				
			||||||
@@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor):
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def _extract_url(webpage):
 | 
				
			||||||
 | 
					        iframe_m = re.search(
 | 
				
			||||||
 | 
					            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
 | 
				
			||||||
 | 
					        if iframe_m:
 | 
				
			||||||
 | 
					            return iframe_m.group('url')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        video_id = self._match_id(url)
 | 
					        video_id = self._match_id(url)
 | 
				
			||||||
        webpage = self._download_webpage(url, video_id)
 | 
					        webpage = self._download_webpage(url, video_id)
 | 
				
			||||||
@@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor):
 | 
				
			|||||||
        return self.url_result(theplatform_url, 'ThePlatform')
 | 
					        return self.url_result(theplatform_url, 'ThePlatform')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class NBCSportsIE(InfoExtractor):
 | 
				
			||||||
 | 
					    # Does not include https becuase its certificate is invalid
 | 
				
			||||||
 | 
					    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _TEST = {
 | 
				
			||||||
 | 
					        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
 | 
				
			||||||
 | 
					        'md5': 'ba6c93f96b67bf05344f78bd523dac0f',
 | 
				
			||||||
 | 
					        'info_dict': {
 | 
				
			||||||
 | 
					            'id': 'PHJSaFWbrTY9',
 | 
				
			||||||
 | 
					            'ext': 'flv',
 | 
				
			||||||
 | 
					            'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
 | 
				
			||||||
 | 
					            'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
 | 
					        video_id = self._match_id(url)
 | 
				
			||||||
 | 
					        webpage = self._download_webpage(url, video_id)
 | 
				
			||||||
 | 
					        return self.url_result(
 | 
				
			||||||
 | 
					            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class NBCNewsIE(InfoExtractor):
 | 
					class NBCNewsIE(InfoExtractor):
 | 
				
			||||||
    _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
 | 
					    _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
 | 
				
			||||||
        (?:video/.+?/(?P<id>\d+)|
 | 
					        (?:video/.+?/(?P<id>\d+)|
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -17,6 +17,8 @@ from ..utils import (
 | 
				
			|||||||
    int_or_none,
 | 
					    int_or_none,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from .nbc import NBCSportsVPlayerIE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class YahooIE(InfoExtractor):
 | 
					class YahooIE(InfoExtractor):
 | 
				
			||||||
    IE_DESC = 'Yahoo screen and movies'
 | 
					    IE_DESC = 'Yahoo screen and movies'
 | 
				
			||||||
@@ -132,6 +134,7 @@ class YahooIE(InfoExtractor):
 | 
				
			|||||||
        }, {
 | 
					        }, {
 | 
				
			||||||
            'note': 'NBC Sports embeds',
 | 
					            'note': 'NBC Sports embeds',
 | 
				
			||||||
            'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
 | 
					            'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
 | 
				
			||||||
 | 
					            'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5',
 | 
				
			||||||
            'info_dict': {
 | 
					            'info_dict': {
 | 
				
			||||||
                'id': '9CsDKds0kvHI',
 | 
					                'id': '9CsDKds0kvHI',
 | 
				
			||||||
                'ext': 'flv',
 | 
					                'ext': 'flv',
 | 
				
			||||||
@@ -161,10 +164,9 @@ class YahooIE(InfoExtractor):
 | 
				
			|||||||
                video_id = items[0]['id']
 | 
					                video_id = items[0]['id']
 | 
				
			||||||
                return self._get_info(video_id, display_id, webpage)
 | 
					                return self._get_info(video_id, display_id, webpage)
 | 
				
			||||||
        # Look for NBCSports iframes
 | 
					        # Look for NBCSports iframes
 | 
				
			||||||
        iframe_m = re.search(
 | 
					        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
 | 
				
			||||||
            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
 | 
					        if nbc_sports_url:
 | 
				
			||||||
        if iframe_m:
 | 
					            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
 | 
				
			||||||
            return self.url_result(iframe_m.group('url'), 'NBCSports')
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        items_json = self._search_regex(
 | 
					        items_json = self._search_regex(
 | 
				
			||||||
            r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
 | 
					            r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user