mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	| @@ -1,16 +1,21 @@ | ||||
| import json | ||||
| import re | ||||
| import urllib.parse | ||||
| import time | ||||
| import uuid | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..networking.exceptions import HTTPError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     jwt_decode_hs256, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
|     url_or_none, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
| from ..utils.traversal import traverse_obj | ||||
| 
 | ||||
| @@ -276,81 +281,213 @@ class MLBVideoIE(MLBBaseIE): | ||||
| class MLBTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})' | ||||
|     _NETRC_MACHINE = 'mlb' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638', | ||||
|         'info_dict': { | ||||
|             'id': '661581', | ||||
|             'ext': 'mp4', | ||||
|             'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies', | ||||
|             'release_date': '20220702', | ||||
|             'release_timestamp': 1656792300, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|     _GRAPHQL_INIT_QUERY = '''\ | ||||
| mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) { | ||||
|     initSession(device: $device, clientType: $clientType, experience: $experience) { | ||||
|         deviceId | ||||
|         sessionId | ||||
|         entitlements { | ||||
|             code | ||||
|         } | ||||
|         location { | ||||
|             countryCode | ||||
|             regionName | ||||
|             zipCode | ||||
|             latitude | ||||
|             longitude | ||||
|         } | ||||
|         clientExperience | ||||
|         features | ||||
|     } | ||||
|   }''' | ||||
|     _GRAPHQL_PLAYBACK_QUERY = '''\ | ||||
| mutation initPlaybackSession( | ||||
|         $adCapabilities: [AdExperienceType] | ||||
|         $mediaId: String! | ||||
|         $deviceId: String! | ||||
|         $sessionId: String! | ||||
|         $quality: PlaybackQuality | ||||
|     ) { | ||||
|         initPlaybackSession( | ||||
|             adCapabilities: $adCapabilities | ||||
|             mediaId: $mediaId | ||||
|             deviceId: $deviceId | ||||
|             sessionId: $sessionId | ||||
|             quality: $quality | ||||
|         ) { | ||||
|             playbackSessionId | ||||
|             playback { | ||||
|                 url | ||||
|                 token | ||||
|                 expiration | ||||
|                 cdn | ||||
|             } | ||||
|         } | ||||
|     }''' | ||||
|     _APP_VERSION = '7.8.2' | ||||
|     _device_id = None | ||||
|     _session_id = None | ||||
|     _access_token = None | ||||
|     _token_expiry = 0 | ||||
| 
 | ||||
|     @property | ||||
|     def _api_headers(self): | ||||
|         if (self._token_expiry - 120) <= time.time(): | ||||
|             self.write_debug('Access token has expired; re-logging in') | ||||
|             self._perform_login(*self._get_login_info()) | ||||
|         return {'Authorization': f'Bearer {self._access_token}'} | ||||
| 
 | ||||
|     def _real_initialize(self): | ||||
|         if not self._access_token: | ||||
|             self.raise_login_required( | ||||
|                 'All videos are only available to registered users', method='password') | ||||
| 
 | ||||
|     def _set_device_id(self, username): | ||||
|         if not self._device_id: | ||||
|             self._device_id = self.cache.load( | ||||
|                 self._NETRC_MACHINE, 'device_ids', default={}).get(username) | ||||
|         if self._device_id: | ||||
|             return | ||||
|         self._device_id = str(uuid.uuid4()) | ||||
|         self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id}) | ||||
| 
 | ||||
|     def _perform_login(self, username, password): | ||||
|         data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356' | ||||
|         access_token = self._download_json( | ||||
|             'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None, | ||||
|             headers={ | ||||
|                 'User-Agent': 'okhttp/3.12.1', | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded', | ||||
|             }, data=data.encode())['access_token'] | ||||
|         try: | ||||
|             self._access_token = self._download_json( | ||||
|                 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None, | ||||
|                 'Logging in', 'Unable to log in', headers={ | ||||
|                     'User-Agent': 'okhttp/3.12.1', | ||||
|                     'Content-Type': 'application/x-www-form-urlencoded', | ||||
|                 }, data=urlencode_postdata({ | ||||
|                     'grant_type': 'password', | ||||
|                     'username': username, | ||||
|                     'password': password, | ||||
|                     'scope': 'openid offline_access', | ||||
|                     'client_id': '0oa3e1nutA1HLzAKG356', | ||||
|                 }))['access_token'] | ||||
|         except ExtractorError as error: | ||||
|             if isinstance(error.cause, HTTPError) and error.cause.status == 400: | ||||
|                 raise ExtractorError('Invalid username or password', expected=True) | ||||
|             raise | ||||
| 
 | ||||
|         entitlement = self._download_webpage( | ||||
|             f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None, | ||||
|             headers={ | ||||
|                 'User-Agent': 'okhttp/3.12.1', | ||||
|                 'Authorization': f'Bearer {access_token}', | ||||
|             }) | ||||
|         self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0 | ||||
|         self._set_device_id(username) | ||||
| 
 | ||||
|         data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv' | ||||
|         self._access_token = self._download_json( | ||||
|             'https://us.edge.bamgrid.com/token', None, | ||||
|         self._session_id = self._call_api({ | ||||
|             'operationName': 'initSession', | ||||
|             'query': self._GRAPHQL_INIT_QUERY, | ||||
|             'variables': { | ||||
|                 'device': { | ||||
|                     'appVersion': self._APP_VERSION, | ||||
|                     'deviceFamily': 'desktop', | ||||
|                     'knownDeviceId': self._device_id, | ||||
|                     'languagePreference': 'ENGLISH', | ||||
|                     'manufacturer': '', | ||||
|                     'model': '', | ||||
|                     'os': '', | ||||
|                     'osVersion': '', | ||||
|                 }, | ||||
|                 'clientType': 'WEB', | ||||
|             }, | ||||
|         }, None, 'session ID')['data']['initSession']['sessionId'] | ||||
| 
 | ||||
|     def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True): | ||||
|         return self._download_json( | ||||
|             'https://media-gateway.mlb.com/graphql', video_id, | ||||
|             f'Downloading {description}', f'Unable to download {description}', fatal=fatal, | ||||
|             headers={ | ||||
|                 **self._api_headers, | ||||
|                 'Accept': 'application/json', | ||||
|                 'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk', | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded', | ||||
|             }, data=data.encode())['access_token'] | ||||
|                 'Content-Type': 'application/json', | ||||
|                 'x-client-name': 'WEB', | ||||
|                 'x-client-version': self._APP_VERSION, | ||||
|             }, data=json.dumps(data, separators=(',', ':')).encode()) | ||||
| 
 | ||||
|     def _extract_formats_and_subtitles(self, broadcast, video_id): | ||||
|         feed = traverse_obj(broadcast, ('homeAway', {str.title})) | ||||
|         medium = traverse_obj(broadcast, ('type', {str})) | ||||
|         language = traverse_obj(broadcast, ('language', {str.lower})) | ||||
|         format_id = join_nonempty(feed, medium, language) | ||||
| 
 | ||||
|         response = self._call_api({ | ||||
|             'operationName': 'initPlaybackSession', | ||||
|             'query': self._GRAPHQL_PLAYBACK_QUERY, | ||||
|             'variables': { | ||||
|                 'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'], | ||||
|                 'deviceId': self._device_id, | ||||
|                 'mediaId': broadcast['mediaId'], | ||||
|                 'quality': 'PLACEHOLDER', | ||||
|                 'sessionId': self._session_id, | ||||
|             }, | ||||
|         }, video_id, f'{format_id} broadcast JSON', fatal=False) | ||||
| 
 | ||||
|         playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict})) | ||||
|         m3u8_url = traverse_obj(playback, ('url', {url_or_none})) | ||||
|         token = traverse_obj(playback, ('token', {str})) | ||||
| 
 | ||||
|         if not (m3u8_url and token): | ||||
|             errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str}))) | ||||
|             if 'not entitled' in errors: | ||||
|                 raise ExtractorError(errors, expected=True) | ||||
|             elif errors:  # Only warn when 'blacked out' since radio formats are available | ||||
|                 self.report_warning(f'API returned errors for {format_id}: {errors}') | ||||
|             else: | ||||
|                 self.report_warning(f'No formats available for {format_id} broadcast; skipping') | ||||
|             return [], {} | ||||
| 
 | ||||
|         cdn_headers = {'x-cdn-token': token} | ||||
|         fmts, subs = self._extract_m3u8_formats_and_subtitles( | ||||
|             m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4', | ||||
|             m3u8_id=format_id, fatal=False, headers=cdn_headers) | ||||
|         for fmt in fmts: | ||||
|             fmt['http_headers'] = cdn_headers | ||||
|             fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' ')) | ||||
|             fmt.setdefault('language', language) | ||||
|             if fmt.get('vcodec') == 'none' and fmt['language'] == 'en': | ||||
|                 fmt['source_preference'] = 10 | ||||
| 
 | ||||
|         return fmts, subs | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         airings = self._download_json( | ||||
|             f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D', | ||||
|             video_id)['data']['Airings'] | ||||
|         metadata = traverse_obj(self._download_json( | ||||
|             'https://statsapi.mlb.com/api/v1/schedule', video_id, query={ | ||||
|                 'gamePk': video_id, | ||||
|                 'hydrate': 'broadcasts(all),statusFlags', | ||||
|             }), ('dates', ..., 'games', lambda _, v: str(v['gamePk']) == video_id and v['broadcasts'], any)) | ||||
| 
 | ||||
|         broadcasts = traverse_obj(metadata, ( | ||||
|             'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF')) | ||||
| 
 | ||||
|         formats, subtitles = [], {} | ||||
|         for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']): | ||||
|             format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing) | ||||
|             m3u8_url = traverse_obj(self._download_json( | ||||
|                 airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id, | ||||
|                 note=f'Downloading {format_id} stream info JSON', | ||||
|                 errnote=f'Failed to download {format_id} stream info, skipping', | ||||
|                 fatal=False, headers={ | ||||
|                     'Authorization': self._access_token, | ||||
|                     'Accept': 'application/vnd.media-service+json; version=2', | ||||
|                 }), ('stream', 'complete', {url_or_none})) | ||||
|             if not m3u8_url: | ||||
|                 continue | ||||
|             f, s = self._extract_m3u8_formats_and_subtitles( | ||||
|                 m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) | ||||
|             formats.extend(f) | ||||
|             self._merge_subtitles(s, target=subtitles) | ||||
|         for broadcast in broadcasts: | ||||
|             fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id) | ||||
|             formats.extend(fmts) | ||||
|             self._merge_subtitles(subs, target=subtitles) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False), | ||||
|             'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE', | ||||
|             'title': join_nonempty( | ||||
|                 traverse_obj(metadata, ('officialDate', {str})), | ||||
|                 traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})), | ||||
|                 delim=' - '), | ||||
|             'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON', | ||||
|             'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'http_headers': {'Authorization': f'Bearer {self._access_token}'}, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bashonly
					bashonly