1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-10-24 03:08:34 +00:00

[ie/appleconnect] Rework extractor (#13229)

Authored by: doe1080
This commit is contained in:
doe1080 2025-10-16 03:42:15 +09:00 committed by GitHub
parent c7bda2192a
commit 78748b506f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,47 +1,125 @@
import time
from .common import InfoExtractor
from ..utils import ExtractorError, str_to_int
from ..utils import (
ExtractorError,
extract_attributes,
float_or_none,
jwt_decode_hs256,
jwt_encode,
parse_resolution,
qualities,
unified_strdate,
update_url,
url_or_none,
urljoin,
)
from ..utils.traversal import (
find_element,
require,
traverse_obj,
)
class AppleConnectIE(InfoExtractor):
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
IE_NAME = 'apple:music:connect'
IE_DESC = 'Apple Music Connect'
_BASE_URL = 'https://music.apple.com'
_QUALITIES = {
'provisionalUploadVideo': None,
'sdVideo': 480,
'sdVideoWithPlusAudio': 480,
'sd480pVideo': 480,
'720pHdVideo': 720,
'1080pHdVideo': 1080,
}
_VALID_URL = r'https?://music\.apple\.com/[\w-]+/post/(?P<id>\d+)'
_TESTS = [{
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'md5': 'c1d41f72c8bcaf222e089434619316e4',
'url': 'https://music.apple.com/us/post/1018290019',
'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'id': '1018290019',
'ext': 'm4v',
'title': 'Energy',
'uploader': 'Drake',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 177.911,
'thumbnail': r're:https?://.+\.png',
'upload_date': '20150710',
'timestamp': 1436545535,
'uploader': 'Drake',
},
}, {
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
'only_matching': True,
'url': 'https://music.apple.com/us/post/1016746627',
'info_dict': {
'id': '1016746627',
'ext': 'm4v',
'title': 'Body Shop (Madonna) - Chellous Lima (Acoustic Cover)',
'duration': 210.278,
'thumbnail': r're:https?://.+\.png',
'upload_date': '20150706',
'uploader': 'Chellous Lima',
},
}]
_jwt = None
@staticmethod
def _jwt_is_expired(token):
return jwt_decode_hs256(token)['exp'] - time.time() < 120
def _get_token(self, webpage, video_id):
if self._jwt and not self._jwt_is_expired(self._jwt):
return self._jwt
js_url = traverse_obj(webpage, (
{find_element(tag='script', attr='crossorigin', value='', html=True)},
{extract_attributes}, 'src', {urljoin(self._BASE_URL)}, {require('JS URL')}))
js = self._download_webpage(
js_url, video_id, 'Downloading token JS', 'Unable to download token JS')
header = jwt_encode({}, '', headers={'alg': 'ES256', 'kid': 'WebPlayKid'}).split('.')[0]
self._jwt = self._search_regex(
fr'(["\'])(?P<jwt>{header}(?:\.[\w-]+){{2}})\1', js, 'JSON Web Token', group='jwt')
if self._jwt_is_expired(self._jwt):
raise ExtractorError('The fetched token is already expired')
return self._jwt
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
try:
video_json = self._html_search_regex(
r'class="auc-video-data">(\{.*?\})', webpage, 'json')
except ExtractorError:
raise ExtractorError('This post doesn\'t contain a video', expected=True)
videos = self._download_json(
'https://amp-api.music.apple.com/v1/catalog/us/uploaded-videos',
video_id, headers={
'Authorization': f'Bearer {self._get_token(webpage, video_id)}',
'Origin': self._BASE_URL,
}, query={'ids': video_id, 'l': 'en-US'})
attributes = traverse_obj(videos, (
'data', ..., 'attributes', any, {require('video information')}))
video_data = self._parse_json(video_json, video_id)
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
formats = []
quality = qualities(list(self._QUALITIES.keys()))
for format_id, src_url in traverse_obj(attributes, (
'assetTokens', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({
'ext': 'm4v',
'format_id': format_id,
'height': self._QUALITIES.get(format_id),
'quality': quality(format_id),
'url': src_url,
**parse_resolution(update_url(src_url, query=None), lenient=True),
})
return {
'id': video_id,
'url': video_data['sslSrc'],
'title': video_data['title'],
'description': video_data['description'],
'uploader': video_data['artistName'],
'thumbnail': video_data['artworkUrl'],
'timestamp': timestamp,
'like_count': like_count,
'formats': formats,
'thumbnail': self._html_search_meta(
['og:image', 'og:image:secure_url', 'twitter:image'], webpage),
**traverse_obj(attributes, {
'title': ('name', {str}),
'duration': ('durationInMilliseconds', {float_or_none(scale=1000)}),
'upload_date': ('uploadDate', {unified_strdate}),
'uploader': (('artistName', 'uploadingArtistName'), {str}, any),
'webpage_url': ('postUrl', {url_or_none}),
}),
}