Merge 364ef4b90b into e8d49b1c7f

2025-08-17 18:08:30 +00:00 · 2025-08-09 18:36:33 +00:00 · 2025-08-09 18:36:33 +00:00 · 499dd819d2
commit 499dd819d2
parent e8d49b1c7f 364ef4b90b
1 changed files with 58 additions and 24 deletions
--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@ -71,7 +71,7 @@ class BandcampIE(InfoExtractor):
            'album_artists': ['Ben Prunty'],
        },
    }, {
-        # no free download, mp3 128
+        # track from compilation album (artist/album_artist difference)
        'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
        'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
        'info_dict': {
@ -96,7 +96,7 @@ class BandcampIE(InfoExtractor):
            'album_artists': ['Mastodon'],
        },
    }, {
-        # track from compilation album (artist/album_artist difference)
+        # FIXME: Embed detection
        'url': 'https://diskotopia.bandcamp.com/track/safehouse',
        'md5': '19c5337bca1428afa54129f86a2f6a69',
        'info_dict': {
@ -440,9 +440,42 @@ def _real_extract(self, url):

        blob = self._extract_data_attr(webpage, show_id, 'blob')

-        show = blob['bcw_data'][show_id]
+        shows_list = try_get(blob, lambda x: x['appData']['shows'], list)
+        show = None
+        if shows_list:
+            for s in shows_list:
+                if str(s.get('showId')) == show_id:
+                    show = s
+                    break
+
+        if not show:
+           
+            show = try_get(blob, lambda x: x['bcw_data'][show_id], dict)
+
+        if not show:
+            raise ExtractorError('Bandcamp Weekly data not found. This extractor is outdated. Please report this issue.')

        formats = []
+        audio_track_id = str_or_none(show.get('audioTrackId'))
+
+        # If audio track ID is found, download the audio page to get formats
+        if audio_track_id:
+            track_url = f'https://bandcamp.com/download?id={audio_track_id}'
+            audio_page = self._download_webpage(
+                track_url, show_id, 'Downloading audio download page')
+            audio_blob = self._extract_data_attr(audio_page, show_id, 'blob', fatal=False)
+            if audio_blob:
+                # The formats are now in the 'downloads' list within the audio_blob
+                downloads = try_get(audio_blob, lambda x: x['digital_items'][0]['downloads'], dict)
+                if downloads:
+                    for format_id, f in downloads.items():
+                        formats.append({
+                            'url': f.get('url'),
+                            'format_id': format_id,
+                            'ext': f.get('encoding_name'),
+                            'vcodec': 'none',
+                        })
+        if not formats and show.get('audio_stream'):
            for format_id, format_url in show['audio_stream'].items():
                if not url_or_none(format_url):
                    continue
@ -458,21 +491,22 @@ def _real_extract(self, url):
                    'ext': ext,
                    'vcodec': 'none',
                })
+        if not formats:
+            raise ExtractorError('Could not find any audio formats for this episode.')

-        title = show.get('audio_title') or 'Bandcamp Weekly'
-        subtitle = show.get('subtitle')
+        title = show.get('audio_title') or show.get('title') or 'Bandcamp Weekly'
+        subtitle = show.get('shortDesc')
        if subtitle:
            title += f' - {subtitle}'
-
        return {
            'id': show_id,
            'title': title,
-            'description': show.get('desc') or show.get('short_desc'),
+            'description': show.get('desc') or show.get('shortDesc'),
            'duration': float_or_none(show.get('audio_duration')),
            'is_live': False,
-            'release_date': unified_strdate(show.get('published_date')),
+            'release_date': unified_strdate(show.get('date')),
            'series': 'Bandcamp Weekly',
-            'episode': show.get('subtitle'),
+            'episode': show.get('shortDesc'),
            'episode_id': show_id,
            'formats': formats,
        }