Update _real_extract in BandcampWeeklyIE class to handel the keyerror

2025-08-17 18:08:30 +00:00 · 2025-08-09 23:29:50 +05:30 · 2025-08-09 23:29:50 +05:30 · 05285b7e0d
commit 05285b7e0d
parent e8d49b1c7f
1 changed files with 66 additions and 22 deletions
--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@ -440,39 +440,83 @@ def _real_extract(self, url):

        blob = self._extract_data_attr(webpage, show_id, 'blob')

-        show = blob['bcw_data'][show_id]
+        # Updated to correctly navigate the new data structure
+        # The data is now in a list under appData['shows']
+        shows_list = try_get(blob, lambda x: x['appData']['shows'], list)
+        show = None
+        if shows_list:
+            for s in shows_list:
+                if str(s.get('showId')) == show_id:
+                    show = s
+                    break
+
+        if not show:
+            # Fallback to the original logic if the new path fails
+            show = try_get(blob, lambda x: x['bcw_data'][show_id], dict)
+
+        if not show:
+            raise ExtractorError('Bandcamp Weekly data not found. This extractor is outdated. Please report this issue.')

        formats = []
-        for format_id, format_url in show['audio_stream'].items():
-            if not url_or_none(format_url):
-                continue
-            for known_ext in KNOWN_EXTENSIONS:
-                if known_ext in format_id:
-                    ext = known_ext
-                    break
-            else:
-                ext = None
-            formats.append({
-                'format_id': format_id,
-                'url': format_url,
-                'ext': ext,
-                'vcodec': 'none',
-            })
+        # The audio track ID is now in the 'audioTrackId' key
+        audio_track_id = str_or_none(show.get('audioTrackId'))

-        title = show.get('audio_title') or 'Bandcamp Weekly'
-        subtitle = show.get('subtitle')
+        # If audio track ID is found, download the audio page to get formats
+        if audio_track_id:
+            track_url = f'https://bandcamp.com/download?id={audio_track_id}'
+            audio_page = self._download_webpage(
+                track_url, show_id, 'Downloading audio download page')
+            
+            # The download links are on the new page, so we need a new way to parse
+            audio_blob = self._extract_data_attr(audio_page, show_id, 'blob', fatal=False)
+            if audio_blob:
+                # The formats are now in the 'downloads' list within the audio_blob
+                downloads = try_get(audio_blob, lambda x: x['digital_items'][0]['downloads'], dict)
+                if downloads:
+                    for format_id, f in downloads.items():
+                        formats.append({
+                            'url': f.get('url'),
+                            'format_id': format_id,
+                            'ext': f.get('encoding_name'),
+                            'vcodec': 'none',
+                        })
+        
+        # Fallback to the old logic if new parsing fails
+        if not formats and show.get('audio_stream'):
+            for format_id, format_url in show['audio_stream'].items():
+                if not url_or_none(format_url):
+                    continue
+                for known_ext in KNOWN_EXTENSIONS:
+                    if known_ext in format_id:
+                        ext = known_ext
+                        break
+                else:
+                    ext = None
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'ext': ext,
+                    'vcodec': 'none',
+                })
+        
+        # If no formats were found after all attempts, raise an error
+        if not formats:
+            raise ExtractorError('Could not find any audio formats for this episode.')
+
+        title = show.get('audio_title') or show.get('title') or 'Bandcamp Weekly'
+        subtitle = show.get('shortDesc')
        if subtitle:
            title += f' - {subtitle}'
            
        return {
            'id': show_id,
            'title': title,
-            'description': show.get('desc') or show.get('short_desc'),
+            'description': show.get('desc') or show.get('shortDesc'),
            'duration': float_or_none(show.get('audio_duration')),
            'is_live': False,
-            'release_date': unified_strdate(show.get('published_date')),
+            'release_date': unified_strdate(show.get('date')),
            'series': 'Bandcamp Weekly',
-            'episode': show.get('subtitle'),
+            'episode': show.get('shortDesc'),
            'episode_id': show_id,
            'formats': formats,
        }