diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 0a8f88fa8..4c9c9c148 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -440,39 +440,83 @@ def _real_extract(self, url): blob = self._extract_data_attr(webpage, show_id, 'blob') - show = blob['bcw_data'][show_id] + # Updated to correctly navigate the new data structure + # The data is now in a list under appData['shows'] + shows_list = try_get(blob, lambda x: x['appData']['shows'], list) + show = None + if shows_list: + for s in shows_list: + if str(s.get('showId')) == show_id: + show = s + break + + if not show: + # Fallback to the original logic if the new path fails + show = try_get(blob, lambda x: x['bcw_data'][show_id], dict) + + if not show: + raise ExtractorError('Bandcamp Weekly data not found. This extractor is outdated. Please report this issue.') formats = [] - for format_id, format_url in show['audio_stream'].items(): - if not url_or_none(format_url): - continue - for known_ext in KNOWN_EXTENSIONS: - if known_ext in format_id: - ext = known_ext - break - else: - ext = None - formats.append({ - 'format_id': format_id, - 'url': format_url, - 'ext': ext, - 'vcodec': 'none', - }) + # The audio track ID is now in the 'audioTrackId' key + audio_track_id = str_or_none(show.get('audioTrackId')) - title = show.get('audio_title') or 'Bandcamp Weekly' - subtitle = show.get('subtitle') + # If audio track ID is found, download the audio page to get formats + if audio_track_id: + track_url = f'https://bandcamp.com/download?id={audio_track_id}' + audio_page = self._download_webpage( + track_url, show_id, 'Downloading audio download page') + + # The download links are on the new page, so we need a new way to parse + audio_blob = self._extract_data_attr(audio_page, show_id, 'blob', fatal=False) + if audio_blob: + # The formats are now in the 'downloads' list within the audio_blob + downloads = try_get(audio_blob, lambda x: x['digital_items'][0]['downloads'], dict) + if downloads: + for format_id, f in downloads.items(): + formats.append({ + 'url': f.get('url'), + 'format_id': format_id, + 'ext': f.get('encoding_name'), + 'vcodec': 'none', + }) + + # Fallback to the old logic if new parsing fails + if not formats and show.get('audio_stream'): + for format_id, format_url in show['audio_stream'].items(): + if not url_or_none(format_url): + continue + for known_ext in KNOWN_EXTENSIONS: + if known_ext in format_id: + ext = known_ext + break + else: + ext = None + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'ext': ext, + 'vcodec': 'none', + }) + + # If no formats were found after all attempts, raise an error + if not formats: + raise ExtractorError('Could not find any audio formats for this episode.') + + title = show.get('audio_title') or show.get('title') or 'Bandcamp Weekly' + subtitle = show.get('shortDesc') if subtitle: title += f' - {subtitle}' - + return { 'id': show_id, 'title': title, - 'description': show.get('desc') or show.get('short_desc'), + 'description': show.get('desc') or show.get('shortDesc'), 'duration': float_or_none(show.get('audio_duration')), 'is_live': False, - 'release_date': unified_strdate(show.get('published_date')), + 'release_date': unified_strdate(show.get('date')), 'series': 'Bandcamp Weekly', - 'episode': show.get('subtitle'), + 'episode': show.get('shortDesc'), 'episode_id': show_id, 'formats': formats, }