1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-10 15:28:33 +00:00

[ie/appleconnect] Rework extractor

This commit is contained in:
doe1080 2025-05-20 09:57:35 +09:00
parent 2685654a37
commit abd475ecaa

View File

@ -1,47 +1,92 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError, str_to_int from ..utils import (
ExtractorError,
float_or_none,
parse_resolution,
qualities,
url_or_none,
)
from ..utils.traversal import traverse_obj
class AppleConnectIE(InfoExtractor): class AppleConnectIE(InfoExtractor):
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)' IE_NAME = 'apple:music:connect'
IE_DESC = 'Apple Music Connect'
_HEADERS = {
'Authorization': 'Bearer eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IldlYlBsYXlLaWQifQ.eyJpc3MiOiJBTVBXZWJQbGF5IiwiaWF0IjoxNzQ2NjM3MTY2LCJleHAiOjE3NTM4OTQ3NjYsInJvb3RfaHR0cHNfb3JpZ2luIjpbImFwcGxlLmNvbSJdfQ.ONPUnh6UMOJ1VWujIxxWuTdi2ueBAM01B8xMg4NkNy9mdE_C1Y15-xKGoZ6Qg6mgC-ZMdfFHt5Xf4hL4X4-lMw',
'Origin': 'https://music.apple.com',
}
_QUALITIES = {
'provisionalUploadVideo': (None, None),
'sdVideo': (640, 480),
'sdVideoWithPlusAudio': (640, 480),
'sd480pVideo': (720, 480),
'720pHdVideo': (1280, 720),
'1080pHdVideo': (1440, 1080),
}
_VALID_URL = r'https?://music\.apple\.com/\w{0,2}/post/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'url': 'https://music.apple.com/us/post/1018290019',
'md5': 'c1d41f72c8bcaf222e089434619316e4',
'info_dict': { 'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'id': '1018290019',
'ext': 'm4v', 'ext': 'm4v',
'title': 'Energy', 'title': 'Energy',
'uploader': 'Drake', 'duration': 177.911,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+\.png',
'upload_date': '20150710', 'upload_date': '20150710',
'timestamp': 1436545535, 'uploader': 'Drake',
}, },
}, { }, {
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9', 'url': 'https://music.apple.com/us/post/1016746627',
'only_matching': True, 'info_dict': {
'id': '1016746627',
'ext': 'm4v',
'title': 'Body Shop (Madonna) - Chellous Lima (Acoustic Cover)',
'duration': 210.278,
'thumbnail': r're:https?://.+\.png',
'upload_date': '20150706',
'uploader': 'Chellous Lima',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
try: if not (videos := traverse_obj(self._download_json(
video_json = self._html_search_regex( 'https://amp-api.music.apple.com/v1/catalog/us/uploaded-videos',
r'class="auc-video-data">(\{.*?\})', webpage, 'json') video_id, headers=self._HEADERS, query={'ids': video_id, 'l': 'en-US'},
except ExtractorError: ), ('data', ..., 'attributes', any), default={})):
raise ExtractorError('This post doesn\'t contain a video', expected=True) raise ExtractorError('Failed to fetch video information')
video_data = self._parse_json(video_json, video_id) formats = []
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) quality = qualities(list(self._QUALITIES.keys()))
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None)) for format_id, src_url in traverse_obj(videos, (
'assetTokens', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({
'ext': 'm4v',
'format_id': format_id,
'quality': quality(format_id),
'url': src_url,
**parse_resolution(src_url),
**traverse_obj(self._QUALITIES, (format_id, {
'height': 1,
'width': 0,
})),
})
return { return {
'id': video_id, 'id': video_id,
'url': video_data['sslSrc'], 'formats': formats,
'title': video_data['title'], 'thumbnail': self._html_search_meta(
'description': video_data['description'], ('og:image', 'og:image:secure_url', 'twitter:image'), webpage),
'uploader': video_data['artistName'], **traverse_obj(videos, {
'thumbnail': video_data['artworkUrl'], 'title': ('name', {str}),
'timestamp': timestamp, 'duration': ('durationInMilliseconds', {float_or_none(scale=1000)}),
'like_count': like_count, 'upload_date': ('uploadDate', {str}, {lambda x: x.replace('-', '')}),
'uploader': (('artistName', 'uploadingArtistName'), {str}, any),
'webpage_url': ('postUrl', {url_or_none}),
}),
} }