diff --git a/yt_dlp/extractor/udio.py b/yt_dlp/extractor/udio.py index 294132ecc..e4fb4bd4a 100644 --- a/yt_dlp/extractor/udio.py +++ b/yt_dlp/extractor/udio.py @@ -1,4 +1,7 @@ +import re + from .common import InfoExtractor +from ..utils import determine_ext, urljoin class UdioIE(InfoExtractor): @@ -6,74 +9,84 @@ class UdioIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.udio.com/songs/ehJuLz9DuCtVapQMVMcA7N', 'info_dict': - {'id': 'ehJuLz9DuCtVapQMVMcA7N', - 'title': 'Lost Love | Udio', - 'description': "Listen to Lost Love by The I Don't Knows on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.", - 'uploader': "The I Don't Knows", - 'uploader_url': "https://udio.com/artist/The I Don't Knows"}, + { + 'id': 'ehJuLz9DuCtVapQMVMcA7N', + 'title': 'Lost Love', + 'description': "Listen to Lost Love by The I Don't Knows on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.", + 'uploader': "The I Don't Knows", + 'uploader_url': "https://udio.com/artist/The I Don't Knows", + }, 'playlist_count': 1, }, { 'url': 'https://www.udio.com/songs/hGZqWy3bPMrN3tosf5QZqt', 'info_dict': - {'id': 'hGZqWy3bPMrN3tosf5QZqt', - 'title': 'Batou - What is this love thing? (feat. EchoShadow) | Udio', - 'description': 'Listen to Batou - What is this love thing? (feat. EchoShadow) by DigitalScribe on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', - 'uploader': 'DigitalScribe', - 'uploader_url': 'https://udio.com/artist/DigitalScribe'}, + { + 'id': 'hGZqWy3bPMrN3tosf5QZqt', + 'title': 'Batou - What is this love thing? (feat. EchoShadow)', + 'description': 'Listen to Batou - What is this love thing? (feat. EchoShadow) by DigitalScribe on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', + 'uploader': 'DigitalScribe', + 'uploader_url': 'https://udio.com/artist/DigitalScribe', + }, 'playlist_count': 1, }, { 'url': 'https://www.udio.com/songs/dRFAMqCzqkTAX13F4MTKCb', - 'info_dict': {'id': 'dRFAMqCzqkTAX13F4MTKCb', - 'title': 'Évasion en Route | Udio', - 'description': 'Listen to Évasion en Route by aveiro on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', - 'uploader': 'aveiro', - 'uploader_url': 'https://udio.com/artist/aveiro'}, + 'info_dict': { + 'id': 'dRFAMqCzqkTAX13F4MTKCb', + 'title': 'Évasion en Route', + 'description': 'Listen to Évasion en Route by aveiro on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', + 'uploader': 'aveiro', + 'uploader_url': 'https://udio.com/artist/aveiro', + }, 'playlist_count': 1, }, { 'url': 'https://www.udio.com/songs/edMsDRvAiFosixHHTVbJ1L', - 'info_dict': {'id': 'edMsDRvAiFosixHHTVbJ1L', - 'title': 'Charlie e la Felicità ext v2.2 | Udio', - 'description': 'Listen to Charlie e la Felicità ext v2.2 by GIANI_curzioilGRANDE on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', - 'uploader': 'GIANI_curzioilGRANDE', - 'uploader_url': 'https://udio.com/artist/GIANI_curzioilGRANDE'}, + 'info_dict': { + 'id': 'edMsDRvAiFosixHHTVbJ1L', + 'title': 'Charlie e la Felicità ext v2.2', + 'description': 'Listen to Charlie e la Felicità ext v2.2 by GIANI_curzioilGRANDE on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', + 'uploader': 'GIANI_curzioilGRANDE', + 'uploader_url': 'https://udio.com/artist/GIANI_curzioilGRANDE', + }, 'playlist_count': 1, }, { 'url': 'https://www.udio.com/songs/fPoZ7yLUv8orY2sNzeYNFp', - 'info_dict': {'id': 'fPoZ7yLUv8orY2sNzeYNFp', - 'title': 'Nocturnal Vibes | Udio', - 'description': 'Listen to Nocturnal Vibes by RaulKong898 on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', - 'uploader': 'RaulKong898', - 'uploader_url': 'https://udio.com/artist/RaulKong898'}, + 'info_dict': { + 'id': 'fPoZ7yLUv8orY2sNzeYNFp', + 'title': 'Nocturnal Vibes', + 'description': 'Listen to Nocturnal Vibes by RaulKong898 on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', + 'uploader': 'RaulKong898', + 'uploader_url': 'https://udio.com/artist/RaulKong898', + }, 'playlist_count': 1, }, { 'url': 'https://www.udio.com/songs/pzGGivV6oAR76ZxsnkbVw2', - 'info_dict': {'id': 'pzGGivV6oAR76ZxsnkbVw2', - 'title': 'Eternal Darkness | Udio', - 'description': 'Listen to Eternal Darkness by Para$Graf0815 on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', - 'uploader': 'Para$Graf0815', - 'uploader_url': 'https://udio.com/artist/Para$Graf0815'}, + 'info_dict': { + 'id': 'pzGGivV6oAR76ZxsnkbVw2', + 'title': 'Eternal Darkness', + 'description': 'Listen to Eternal Darkness by Para$Graf0815 on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', + 'uploader': 'Para$Graf0815', + 'uploader_url': 'https://udio.com/artist/Para$Graf0815', + }, 'playlist_count': 1, }, { 'url': 'https://www.udio.com/songs/hSzvdEyBjBXF2CdsJP4zYr', - 'info_dict': {'id': 'hSzvdEyBjBXF2CdsJP4zYr', - 'title': 'Revenge of the Dreamer | Udio', - 'description': 'Listen to Revenge of the Dreamer by Doc Immortal on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', - 'uploader': 'Doc Immortal', - 'uploader_url': 'https://udio.com/artist/Doc Immortal'}, + 'info_dict': { + 'id': 'hSzvdEyBjBXF2CdsJP4zYr', + 'title': 'Revenge of the Dreamer', + 'description': 'Listen to Revenge of the Dreamer by Doc Immortal on Udio. Discover, create, and share music with the world. Use the latest technology to create AI music in seconds.', + 'uploader': 'Doc Immortal', + 'uploader_url': 'https://udio.com/artist/Doc Immortal', + }, 'playlist_count': 1, }, ] def _real_extract(self, url): - artist_id = self._match_id(url) - webpage = self._download_webpage(url, artist_id) - - # Extract the audio URL from the tag - audio_src = self._html_search_meta('og:audio', webpage, 'audio src', fatal=True) + song_id = self._match_id(url) + webpage = self._download_webpage(url, song_id) + media_url = self._og_search_video_url(webpage, default=None) artist_name, title = self._html_extract_title(webpage, default=None).split('-', 1) - description = self._og_search_description(webpage, default=None) - # Create a single entry for the playlist - # Extract lyrics from the pre tag with specific classes + title = title.split('|', 1) lyrics = self._search_regex( r'
\s*(.*?)\s*
', webpage, @@ -82,12 +95,15 @@ def _real_extract(self, url): ) return { - 'id': artist_id, - 'title': title.strip(), + 'id': song_id, + 'title': title[0].strip(), 'description': lyrics, 'uploader': artist_name.strip(), - 'uploader_url': f'https://udio.com/artist/{artist_name.strip()}', - 'url': audio_src, + 'uploader_url': urljoin('https://udio.com/artist/', artist_name.strip()), + 'formats': [{ + 'url': media_url, + 'ext': determine_ext(media_url), + }], } @@ -135,32 +151,17 @@ class UdioListIE(InfoExtractor): }] def _find_links(self, webpage, base_url=None): - """ - Find all song links in the webpage. - Returns a list of absolute URLs. - Args: - webpage: The HTML content of the webpage - base_url: Base URL to prepend to relative links (optional) - Returns: - List of absolute song URLs - """ - import re - - # Look for href attributes with relative song links relative_links = re.findall(r'href="(/songs/[^"?&/]+)"', webpage) - # Convert relative URLs to absolute URLs if not base_url: base_url = 'https://www.udio.com' return [f'{base_url}{relative_link}' for relative_link in relative_links] def _real_extract(self, url): - import re list_type, list_id = self._match_valid_url(url).group('list_type', 'id') webpage = self._download_webpage(url, list_id) - # Look for song cards in the webpage song_cards = re.findall(r']*class="[^"]*song-card[^"]*"[^>]*>(.*?)\s*', webpage, re.DOTALL) @@ -168,31 +169,26 @@ def _real_extract(self, url): entries = [] for card in song_cards: - # Extract song URL song_url_match = re.search(r'href="(/songs/([^"?&/]+))"', card) if not song_url_match: continue song_path = song_url_match.group(1) song_id = song_url_match.group(2) - song_url = f'https://www.udio.com{song_path}' + song_url = urljoin('https://www.udio.com/', song_path) - # Extract song title song_title = self._search_regex( r']*class="[^"]*song-title[^"]*"[^>]*>(.*?)', card, 'song title', default=None) - # Extract artist name artist_name = self._search_regex( r']*class="[^"]*artist-name[^"]*"[^>]*>(.*?)', card, 'artist name', default=None) - # Extract thumbnail thumbnail = self._search_regex( r']*src="([^"]+)"[^>]*class="[^"]*song-cover[^"]*"', card, 'thumbnail', default=None) - # If we found detailed metadata, create a more detailed entry if song_title or artist_name or thumbnail: self.to_screen(f'Found metadata for song {song_id}: {song_title} by {artist_name}') entry = { @@ -211,10 +207,8 @@ def _real_extract(self, url): entries.append(entry) else: - # Fall back to simple URL result if no metadata found entries.append(self.url_result(song_url, 'Udio', song_id)) - # If no entries found using song cards, fall back to our existing method if not entries: self.to_screen('No song cards found, trying alternative methods') song_urls = self._find_links(webpage)