From abd475ecaafd9c4ae507aea88d8e37dc8f82278d Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 20 May 2025 09:57:35 +0900 Subject: [PATCH 1/3] [ie/appleconnect] Rework extractor --- yt_dlp/extractor/appleconnect.py | 95 +++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index 433eb4ed8..084e700ea 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,47 +1,92 @@ from .common import InfoExtractor -from ..utils import ExtractorError, str_to_int +from ..utils import ( + ExtractorError, + float_or_none, + parse_resolution, + qualities, + url_or_none, +) +from ..utils.traversal import traverse_obj class AppleConnectIE(InfoExtractor): - _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P[\w-]+)' + IE_NAME = 'apple:music:connect' + IE_DESC = 'Apple Music Connect' + + _HEADERS = { + 'Authorization': 'Bearer eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IldlYlBsYXlLaWQifQ.eyJpc3MiOiJBTVBXZWJQbGF5IiwiaWF0IjoxNzQ2NjM3MTY2LCJleHAiOjE3NTM4OTQ3NjYsInJvb3RfaHR0cHNfb3JpZ2luIjpbImFwcGxlLmNvbSJdfQ.ONPUnh6UMOJ1VWujIxxWuTdi2ueBAM01B8xMg4NkNy9mdE_C1Y15-xKGoZ6Qg6mgC-ZMdfFHt5Xf4hL4X4-lMw', + 'Origin': 'https://music.apple.com', + } + _QUALITIES = { + 'provisionalUploadVideo': (None, None), + 'sdVideo': (640, 480), + 'sdVideoWithPlusAudio': (640, 480), + 'sd480pVideo': (720, 480), + '720pHdVideo': (1280, 720), + '1080pHdVideo': (1440, 1080), + } + _VALID_URL = r'https?://music\.apple\.com/\w{0,2}/post/(?P\d+)' _TESTS = [{ - 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', - 'md5': 'c1d41f72c8bcaf222e089434619316e4', + 'url': 'https://music.apple.com/us/post/1018290019', 'info_dict': { - 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', + 'id': '1018290019', 'ext': 'm4v', 'title': 'Energy', - 'uploader': 'Drake', - 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 177.911, + 'thumbnail': r're:https?://.+\.png', 'upload_date': '20150710', - 'timestamp': 1436545535, + 'uploader': 'Drake', }, }, { - 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9', - 'only_matching': True, + 'url': 'https://music.apple.com/us/post/1016746627', + 'info_dict': { + 'id': '1016746627', + 'ext': 'm4v', + 'title': 'Body Shop (Madonna) - Chellous Lima (Acoustic Cover)', + 'duration': 210.278, + 'thumbnail': r're:https?://.+\.png', + 'upload_date': '20150706', + 'uploader': 'Chellous Lima', + }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - try: - video_json = self._html_search_regex( - r'class="auc-video-data">(\{.*?\})', webpage, 'json') - except ExtractorError: - raise ExtractorError('This post doesn\'t contain a video', expected=True) + if not (videos := traverse_obj(self._download_json( + 'https://amp-api.music.apple.com/v1/catalog/us/uploaded-videos', + video_id, headers=self._HEADERS, query={'ids': video_id, 'l': 'en-US'}, + ), ('data', ..., 'attributes', any), default={})): + raise ExtractorError('Failed to fetch video information') - video_data = self._parse_json(video_json, video_id) - timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) - like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None)) + formats = [] + quality = qualities(list(self._QUALITIES.keys())) + for format_id, src_url in traverse_obj(videos, ( + 'assetTokens', {dict.items}, lambda _, v: url_or_none(v[1]), + )): + formats.append({ + 'ext': 'm4v', + 'format_id': format_id, + 'quality': quality(format_id), + 'url': src_url, + **parse_resolution(src_url), + **traverse_obj(self._QUALITIES, (format_id, { + 'height': 1, + 'width': 0, + })), + }) return { 'id': video_id, - 'url': video_data['sslSrc'], - 'title': video_data['title'], - 'description': video_data['description'], - 'uploader': video_data['artistName'], - 'thumbnail': video_data['artworkUrl'], - 'timestamp': timestamp, - 'like_count': like_count, + 'formats': formats, + 'thumbnail': self._html_search_meta( + ('og:image', 'og:image:secure_url', 'twitter:image'), webpage), + **traverse_obj(videos, { + 'title': ('name', {str}), + 'duration': ('durationInMilliseconds', {float_or_none(scale=1000)}), + 'upload_date': ('uploadDate', {str}, {lambda x: x.replace('-', '')}), + 'uploader': (('artistName', 'uploadingArtistName'), {str}, any), + 'webpage_url': ('postUrl', {url_or_none}), + }), } From 04d37451864e33efbc0ea3730a66195151a0cdce Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Tue, 27 May 2025 22:14:35 +0900 Subject: [PATCH 2/3] Apply suggestions --- yt_dlp/extractor/appleconnect.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index 084e700ea..d1cc17a81 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,12 +1,11 @@ from .common import InfoExtractor from ..utils import ( - ExtractorError, float_or_none, parse_resolution, qualities, url_or_none, ) -from ..utils.traversal import traverse_obj +from ..utils.traversal import require, traverse_obj class AppleConnectIE(InfoExtractor): @@ -54,15 +53,15 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - if not (videos := traverse_obj(self._download_json( + videos = self._download_json( 'https://amp-api.music.apple.com/v1/catalog/us/uploaded-videos', - video_id, headers=self._HEADERS, query={'ids': video_id, 'l': 'en-US'}, - ), ('data', ..., 'attributes', any), default={})): - raise ExtractorError('Failed to fetch video information') + video_id, headers=self._HEADERS, query={'ids': video_id, 'l': 'en-US'}) + attributes = traverse_obj(videos, ( + 'data', ..., 'attributes', any, {require('video information')})) formats = [] quality = qualities(list(self._QUALITIES.keys())) - for format_id, src_url in traverse_obj(videos, ( + for format_id, src_url in traverse_obj(attributes, ( 'assetTokens', {dict.items}, lambda _, v: url_or_none(v[1]), )): formats.append({ @@ -82,7 +81,7 @@ def _real_extract(self, url): 'formats': formats, 'thumbnail': self._html_search_meta( ('og:image', 'og:image:secure_url', 'twitter:image'), webpage), - **traverse_obj(videos, { + **traverse_obj(attributes, { 'title': ('name', {str}), 'duration': ('durationInMilliseconds', {float_or_none(scale=1000)}), 'upload_date': ('uploadDate', {str}, {lambda x: x.replace('-', '')}), From ad44657a7a92b9037cfe691f473f6109b15d6b8b Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Fri, 6 Jun 2025 14:20:41 +0900 Subject: [PATCH 3/3] fix jwt extraction --- yt_dlp/extractor/appleconnect.py | 36 ++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index d1cc17a81..780bce632 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,21 +1,27 @@ +import base64 +import json + from .common import InfoExtractor from ..utils import ( + extract_attributes, float_or_none, parse_resolution, qualities, url_or_none, + urljoin, +) +from ..utils.traversal import ( + find_element, + require, + traverse_obj, ) -from ..utils.traversal import require, traverse_obj class AppleConnectIE(InfoExtractor): IE_NAME = 'apple:music:connect' IE_DESC = 'Apple Music Connect' - _HEADERS = { - 'Authorization': 'Bearer eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IldlYlBsYXlLaWQifQ.eyJpc3MiOiJBTVBXZWJQbGF5IiwiaWF0IjoxNzQ2NjM3MTY2LCJleHAiOjE3NTM4OTQ3NjYsInJvb3RfaHR0cHNfb3JpZ2luIjpbImFwcGxlLmNvbSJdfQ.ONPUnh6UMOJ1VWujIxxWuTdi2ueBAM01B8xMg4NkNy9mdE_C1Y15-xKGoZ6Qg6mgC-ZMdfFHt5Xf4hL4X4-lMw', - 'Origin': 'https://music.apple.com', - } + _BASE_URL = 'https://music.apple.com' _QUALITIES = { 'provisionalUploadVideo': (None, None), 'sdVideo': (640, 480), @@ -53,9 +59,27 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + js_url = traverse_obj(webpage, ( + {find_element(tag='script', attr='crossorigin', value='', html=True)}, + {extract_attributes}, 'src', {urljoin(self._BASE_URL)}, {require('JS URL')})) + js = self._download_webpage(js_url, video_id) + + header = base64.urlsafe_b64encode( + json.dumps({ + 'alg': 'ES256', + 'typ': 'JWT', + 'kid': 'WebPlayKid', + }, separators=(',', ':')).encode(), + ).decode().rstrip('=') + jwt = self._search_regex( + fr'(["\'])(?P{header}(?:\.[\w-]+){{2}})\1', js, 'JSON Web Token', group='jwt') + videos = self._download_json( 'https://amp-api.music.apple.com/v1/catalog/us/uploaded-videos', - video_id, headers=self._HEADERS, query={'ids': video_id, 'l': 'en-US'}) + video_id, headers={ + 'Authorization': f'Bearer {jwt}', + 'Origin': self._BASE_URL, + }, query={'ids': video_id, 'l': 'en-US'}) attributes = traverse_obj(videos, ( 'data', ..., 'attributes', any, {require('video information')}))