mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[bandcamp] Fix free downloads extraction and extract all formats (closes #11067)
This commit is contained in:
		| @@ -1,7 +1,9 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import random | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
| @@ -12,6 +14,9 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     parse_filesize, | ||||
|     unescapeHTML, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor): | ||||
|             r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') | ||||
|         # We get the dictionary of the track from some javascript code | ||||
|         all_info = self._parse_json(self._search_regex( | ||||
|             r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) | ||||
|         info = all_info[0] | ||||
|         # We pick mp3-320 for now, until format selection can be easily implemented. | ||||
|         mp3_info = info['downloads']['mp3-320'] | ||||
|         # If we try to use this url it says the link has expired | ||||
|         initial_url = mp3_info['url'] | ||||
|         m_url = re.match( | ||||
|             r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$', | ||||
|             initial_url) | ||||
|         # We build the url we will use to get the final track url | ||||
|         # This url is build in Bandcamp in the script download_bunde_*.js | ||||
|         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) | ||||
|         final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') | ||||
|         # If we could correctly generate the .rand field the url would be | ||||
|         # in the "download_url" key | ||||
|         final_url = self._proto_relative_url(self._search_regex( | ||||
|             r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:') | ||||
|         download_webpage = self._download_webpage( | ||||
|             download_link, video_id, 'Downloading free downloads page') | ||||
|  | ||||
|         blob = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage, | ||||
|                 'blob', group='blob'), | ||||
|             video_id, transform_source=unescapeHTML) | ||||
|  | ||||
|         info = blob['digital_items'][0] | ||||
|  | ||||
|         downloads = info['downloads'] | ||||
|         track = info['title'] | ||||
|  | ||||
|         artist = info.get('artist') | ||||
|         title = '%s - %s' % (artist, track) if artist else track | ||||
|  | ||||
|         download_formats = {} | ||||
|         for f in blob['download_formats']: | ||||
|             name, ext = f.get('name'), f.get('file_extension') | ||||
|             if all(isinstance(x, compat_str) for x in (name, ext)): | ||||
|                 download_formats[name] = ext.strip('.') | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, f in downloads.items(): | ||||
|             format_url = f.get('url') | ||||
|             if not format_url: | ||||
|                 continue | ||||
|             # Stat URL generation algorithm is reverse engineered from | ||||
|             # download_*_bundle_*.js | ||||
|             stat_url = update_url_query( | ||||
|                 format_url.replace('/download/', '/statdownload/'), { | ||||
|                     '.rand': int(time.time() * 1000 * random.random()), | ||||
|                 }) | ||||
|             format_id = f.get('encoding_name') or format_id | ||||
|             stat = self._download_json( | ||||
|                 stat_url, video_id, 'Downloading %s JSON' % format_id, | ||||
|                 transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], | ||||
|                 fatal=False) | ||||
|             if not stat: | ||||
|                 continue | ||||
|             retry_url = stat.get('retry_url') | ||||
|             if not isinstance(retry_url, compat_str): | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': self._proto_relative_url(retry_url, 'http:'), | ||||
|                 'ext': download_formats.get(format_id), | ||||
|                 'format_id': format_id, | ||||
|                 'format_note': f.get('description'), | ||||
|                 'filesize': parse_filesize(f.get('size_mb')), | ||||
|                 'vcodec': 'none', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['title'], | ||||
|             'ext': 'mp3', | ||||
|             'vcodec': 'none', | ||||
|             'url': final_url, | ||||
|             'title': title, | ||||
|             'thumbnail': info.get('thumb_url'), | ||||
|             'uploader': info.get('artist'), | ||||
|             'artist': artist, | ||||
|             'track': track, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․