mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[shahid] add support for show pages(closes #7401)
This commit is contained in:
		
							
								
								
									
										78
									
								
								youtube_dl/extractor/aws.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								youtube_dl/extractor/aws.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import hashlib | ||||
| import hmac | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
|  | ||||
|  | ||||
| class AWSIE(InfoExtractor): | ||||
|     _AWS_ALGORITHM = 'AWS4-HMAC-SHA256' | ||||
|     _AWS_REGION = 'us-east-1' | ||||
|  | ||||
|     def _aws_execute_api(self, aws_dict, video_id, query=None): | ||||
|         query = query or {} | ||||
|         amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') | ||||
|         date = amz_date[:8] | ||||
|         headers = { | ||||
|             'Accept': 'application/json', | ||||
|             'Host': self._AWS_PROXY_HOST, | ||||
|             'X-Amz-Date': amz_date, | ||||
|         } | ||||
|         session_token = aws_dict.get('session_token') | ||||
|         if session_token: | ||||
|             headers['X-Amz-Security-Token'] = session_token | ||||
|         headers['X-Api-Key'] = self._AWS_API_KEY | ||||
|  | ||||
|         def aws_hash(s): | ||||
|             return hashlib.sha256(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html | ||||
|         canonical_querystring = compat_urllib_parse_urlencode(query) | ||||
|         canonical_headers = '' | ||||
|         for header_name, header_value in headers.items(): | ||||
|             canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) | ||||
|         signed_headers = ';'.join([header.lower() for header in headers.keys()]) | ||||
|         canonical_request = '\n'.join([ | ||||
|             'GET', | ||||
|             aws_dict['uri'], | ||||
|             canonical_querystring, | ||||
|             canonical_headers, | ||||
|             signed_headers, | ||||
|             aws_hash('') | ||||
|         ]) | ||||
|  | ||||
|         # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html | ||||
|         credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request'] | ||||
|         credential_scope = '/'.join(credential_scope_list) | ||||
|         string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)]) | ||||
|  | ||||
|         # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html | ||||
|         def aws_hmac(key, msg): | ||||
|             return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) | ||||
|  | ||||
|         def aws_hmac_digest(key, msg): | ||||
|             return aws_hmac(key, msg).digest() | ||||
|  | ||||
|         def aws_hmac_hexdigest(key, msg): | ||||
|             return aws_hmac(key, msg).hexdigest() | ||||
|  | ||||
|         k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') | ||||
|         for value in credential_scope_list: | ||||
|             k_signing = aws_hmac_digest(k_signing, value) | ||||
|  | ||||
|         signature = aws_hmac_hexdigest(k_signing, string_to_sign) | ||||
|  | ||||
|         # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html | ||||
|         headers['Authorization'] = ', '.join([ | ||||
|             '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), | ||||
|             'SignedHeaders=%s' % signed_headers, | ||||
|             'Signature=%s' % signature, | ||||
|         ]) | ||||
|  | ||||
|         return self._download_json( | ||||
|             'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), | ||||
|             video_id, headers=headers) | ||||
| @@ -927,7 +927,10 @@ from .sendtonews import SendtoNewsIE | ||||
| from .servingsys import ServingSysIE | ||||
| from .servus import ServusIE | ||||
| from .sexu import SexuIE | ||||
| from .shahid import ShahidIE | ||||
| from .shahid import ( | ||||
|     ShahidIE, | ||||
|     ShahidShowIE, | ||||
| ) | ||||
| from .shared import ( | ||||
|     SharedIE, | ||||
|     VivoIE, | ||||
|   | ||||
| @@ -1,13 +1,11 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
| import hashlib | ||||
| import hmac | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .aws import AWSIE | ||||
| from .anvato import AnvatoIE | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
| @@ -16,7 +14,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ScrippsNetworksWatchIE(InfoExtractor): | ||||
| class ScrippsNetworksWatchIE(AWSIE): | ||||
|     IE_NAME = 'scrippsnetworks:watch' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
| @@ -64,44 +62,27 @@ class ScrippsNetworksWatchIE(InfoExtractor): | ||||
|         'travelchannel': 'trav', | ||||
|         'geniuskitchen': 'genius', | ||||
|     } | ||||
|     _SNI_HOST = 'web.api.video.snidigital.com' | ||||
|  | ||||
|     _AWS_REGION = 'us-east-1' | ||||
|     _AWS_IDENTITY_ID_JSON = json.dumps({ | ||||
|         'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION | ||||
|     }) | ||||
|     _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' | ||||
|     _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' | ||||
|     _AWS_SERVICE = 'execute-api' | ||||
|     _AWS_REQUEST = 'aws4_request' | ||||
|     _AWS_SIGNED_HEADERS = ';'.join([ | ||||
|         'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key']) | ||||
|     _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET | ||||
| %(uri)s | ||||
|     _AWS_PROXY_HOST = 'web.api.video.snidigital.com' | ||||
|  | ||||
| host:%(host)s | ||||
| x-amz-date:%(date)s | ||||
| x-amz-security-token:%(token)s | ||||
| x-api-key:%(key)s | ||||
|  | ||||
| %(signed_headers)s | ||||
| %(payload_hash)s''' | ||||
|     _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site_id, video_id = mobj.group('site', 'id') | ||||
|  | ||||
|         def aws_hash(s): | ||||
|             return hashlib.sha256(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         aws_identity_id_json = json.dumps({ | ||||
|             'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION | ||||
|         }).encode('utf-8') | ||||
|         token = self._download_json( | ||||
|             'https://cognito-identity.us-east-1.amazonaws.com/', video_id, | ||||
|             data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'), | ||||
|             'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, | ||||
|             data=aws_identity_id_json, | ||||
|             headers={ | ||||
|                 'Accept': '*/*', | ||||
|                 'Content-Type': 'application/x-amz-json-1.1', | ||||
|                 'Referer': url, | ||||
|                 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON), | ||||
|                 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), | ||||
|                 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', | ||||
|                 'X-Amz-User-Agent': self._AWS_USER_AGENT, | ||||
|             })['Token'] | ||||
| @@ -124,64 +105,12 @@ x-api-key:%(key)s | ||||
|                 sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, | ||||
|                 fatal=True) | ||||
|  | ||||
|         access_key_id = get('AccessKeyId') | ||||
|         secret_access_key = get('SecretAccessKey') | ||||
|         session_token = get('SessionToken') | ||||
|  | ||||
|         # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html | ||||
|         uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id) | ||||
|         datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') | ||||
|         date = datetime_now[:8] | ||||
|         canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % { | ||||
|             'uri': uri, | ||||
|             'host': self._SNI_HOST, | ||||
|             'date': datetime_now, | ||||
|             'token': session_token, | ||||
|             'key': self._AWS_API_KEY, | ||||
|             'signed_headers': self._AWS_SIGNED_HEADERS, | ||||
|             'payload_hash': aws_hash(''), | ||||
|         } | ||||
|  | ||||
|         # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html | ||||
|         credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]) | ||||
|         string_to_sign = '\n'.join([ | ||||
|             'AWS4-HMAC-SHA256', datetime_now, credential_string, | ||||
|             aws_hash(canonical_string)]) | ||||
|  | ||||
|         # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html | ||||
|         def aws_hmac(key, msg): | ||||
|             return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) | ||||
|  | ||||
|         def aws_hmac_digest(key, msg): | ||||
|             return aws_hmac(key, msg).digest() | ||||
|  | ||||
|         def aws_hmac_hexdigest(key, msg): | ||||
|             return aws_hmac(key, msg).hexdigest() | ||||
|  | ||||
|         k_secret = 'AWS4' + secret_access_key | ||||
|         k_date = aws_hmac_digest(k_secret.encode('utf-8'), date) | ||||
|         k_region = aws_hmac_digest(k_date, self._AWS_REGION) | ||||
|         k_service = aws_hmac_digest(k_region, self._AWS_SERVICE) | ||||
|         k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST) | ||||
|  | ||||
|         signature = aws_hmac_hexdigest(k_signing, string_to_sign) | ||||
|  | ||||
|         auth_header = ', '.join([ | ||||
|             'AWS4-HMAC-SHA256 Credential=%s' % '/'.join( | ||||
|                 [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]), | ||||
|             'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS, | ||||
|             'Signature=%s' % signature, | ||||
|         ]) | ||||
|  | ||||
|         mcp_id = self._download_json( | ||||
|             'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={ | ||||
|                 'Accept': '*/*', | ||||
|                 'Referer': url, | ||||
|                 'Authorization': auth_header, | ||||
|                 'X-Amz-Date': datetime_now, | ||||
|                 'X-Amz-Security-Token': session_token, | ||||
|                 'X-Api-Key': self._AWS_API_KEY, | ||||
|             })['results'][0]['mcpId'] | ||||
|         mcp_id = self._aws_execute_api({ | ||||
|             'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), | ||||
|             'access_key': get('AccessKeyId'), | ||||
|             'secret_key': get('SecretAccessKey'), | ||||
|             'session_token': get('SessionToken'), | ||||
|         }, video_id)['results'][0]['mcpId'] | ||||
|  | ||||
|         return self.url_result( | ||||
|             smuggle_url( | ||||
|   | ||||
| @@ -1,22 +1,53 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import math | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .aws import AWSIE | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     str_or_none, | ||||
|     urlencode_postdata, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ShahidIE(InfoExtractor): | ||||
| class ShahidBaseIE(AWSIE): | ||||
|     _AWS_PROXY_HOST = 'api2.shahid.net' | ||||
|     _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' | ||||
|  | ||||
|     def _handle_error(self, e): | ||||
|         fail_data = self._parse_json( | ||||
|             e.cause.read().decode('utf-8'), None, fatal=False) | ||||
|         if fail_data: | ||||
|             faults = fail_data.get('faults', []) | ||||
|             faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) | ||||
|             if faults_message: | ||||
|                 raise ExtractorError(faults_message, expected=True) | ||||
|  | ||||
|     def _call_api(self, path, video_id, request=None): | ||||
|         query = {} | ||||
|         if request: | ||||
|             query['request'] = json.dumps(request) | ||||
|         try: | ||||
|             return self._aws_execute_api({ | ||||
|                 'uri': '/proxy/v2/' + path, | ||||
|                 'access_key': 'AKIAI6X4TYCIXM2B7MUQ', | ||||
|                 'secret_key': '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', | ||||
|             }, video_id, query) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 self._handle_error(e) | ||||
|             raise | ||||
|  | ||||
|  | ||||
| class ShahidIE(ShahidBaseIE): | ||||
|     _NETRC_MACHINE = 'shahid' | ||||
|     _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
| @@ -41,26 +72,13 @@ class ShahidIE(InfoExtractor): | ||||
|         'only_matching': True | ||||
|     }] | ||||
|  | ||||
|     def _api2_request(self, *args, **kwargs): | ||||
|         try: | ||||
|             return self._download_json(*args, **kwargs) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 fail_data = self._parse_json( | ||||
|                     e.cause.read().decode('utf-8'), None, fatal=False) | ||||
|                 if fail_data: | ||||
|                     faults = fail_data.get('faults', []) | ||||
|                     faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) | ||||
|                     if faults_message: | ||||
|                         raise ExtractorError(faults_message, expected=True) | ||||
|             raise | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         email, password = self._get_login_info() | ||||
|         if email is None: | ||||
|             return | ||||
|  | ||||
|         user_data = self._api2_request( | ||||
|         try: | ||||
|             user_data = self._download_json( | ||||
|                 'https://shahid.mbc.net/wd/service/users/login', | ||||
|                 None, 'Logging in', data=json.dumps({ | ||||
|                     'email': email, | ||||
| @@ -69,6 +87,10 @@ class ShahidIE(InfoExtractor): | ||||
|                 }).encode('utf-8'), headers={ | ||||
|                     'Content-Type': 'application/json; charset=UTF-8', | ||||
|                 })['user'] | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 self._handle_error(e) | ||||
|             raise | ||||
|  | ||||
|         self._download_webpage( | ||||
|             'https://shahid.mbc.net/populateContext', | ||||
| @@ -81,25 +103,13 @@ class ShahidIE(InfoExtractor): | ||||
|                 'sessionId': user_data['sessionId'], | ||||
|             })) | ||||
|  | ||||
|     def _get_api_data(self, response): | ||||
|         data = response.get('data', {}) | ||||
|  | ||||
|         error = data.get('error') | ||||
|         if error: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), | ||||
|                 expected=True) | ||||
|  | ||||
|         return data | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_type, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         if page_type == 'clip': | ||||
|             page_type = 'episode' | ||||
|  | ||||
|         playout = self._api2_request( | ||||
|             'https://api2.shahid.net/proxy/v2/playout/url/' + video_id, | ||||
|             video_id, 'Downloading player JSON')['playout'] | ||||
|         playout = self._call_api( | ||||
|             'playout/url/' + video_id, video_id)['playout'] | ||||
|  | ||||
|         if playout.get('drm'): | ||||
|             raise ExtractorError('This video is DRM protected.', expected=True) | ||||
| @@ -107,13 +117,27 @@ class ShahidIE(InfoExtractor): | ||||
|         formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video = self._get_api_data(self._download_json( | ||||
|         # video = self._call_api( | ||||
|         #     'product/id', video_id, { | ||||
|         #         'id': video_id, | ||||
|         #         'productType': 'ASSET', | ||||
|         #         'productSubType': page_type.upper() | ||||
|         #     })['productModel'] | ||||
|  | ||||
|         response = self._download_json( | ||||
|             'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), | ||||
|             video_id, 'Downloading video JSON', query={ | ||||
|                 'apiKey': 'sh@hid0nlin3', | ||||
|                 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', | ||||
|             }))[page_type] | ||||
|             }) | ||||
|         data = response.get('data', {}) | ||||
|         error = data.get('error') | ||||
|         if error: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), | ||||
|                 expected=True) | ||||
|  | ||||
|         video = data[page_type] | ||||
|         title = video['title'] | ||||
|         categories = [ | ||||
|             category['name'] | ||||
| @@ -135,3 +159,57 @@ class ShahidIE(InfoExtractor): | ||||
|             'episode_id': video_id, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ShahidShowIE(ShahidBaseIE): | ||||
|     _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', | ||||
|         'info_dict': { | ||||
|             'id': '79187', | ||||
|             'title': 'رامز قرش البحر', | ||||
|             'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff', | ||||
|         }, | ||||
|         'playlist_mincount': 32, | ||||
|     }, { | ||||
|         'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|     _PAGE_SIZE = 30 | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_id = self._match_id(url) | ||||
|  | ||||
|         product = self._call_api( | ||||
|             'playableAsset', show_id, {'showId': show_id})['productModel'] | ||||
|         playlist = product['playlist'] | ||||
|         playlist_id = playlist['id'] | ||||
|         show = product.get('show', {}) | ||||
|  | ||||
|         def page_func(page_num): | ||||
|             playlist = self._call_api( | ||||
|                 'product/playlist', show_id, { | ||||
|                     'playListId': playlist_id, | ||||
|                     'pageNumber': page_num, | ||||
|                     'pageSize': 30, | ||||
|                     'sorts': [{ | ||||
|                         'order': 'DESC', | ||||
|                         'type': 'SORTDATE' | ||||
|                     }], | ||||
|                 }) | ||||
|             for product in playlist.get('productList', {}).get('products', []): | ||||
|                 product_url = product.get('productUrl', []).get('url') | ||||
|                 if not product_url: | ||||
|                     continue | ||||
|                 yield self.url_result( | ||||
|                     product_url, 'Shahid', | ||||
|                     str_or_none(product.get('id')), | ||||
|                     product.get('title')) | ||||
|  | ||||
|         entries = InAdvancePagedList( | ||||
|             page_func, | ||||
|             math.ceil(playlist['count'] / self._PAGE_SIZE), | ||||
|             self._PAGE_SIZE) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, show_id, show.get('title'), show.get('description')) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine