mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[mixcloud] Simplify url extraction
On the tracks I tested the server number in the url from the webpage is valid for the mp3 or the m4a file and any other number is invalid, it's a waste of time to check them.
This commit is contained in:
		| @@ -1,7 +1,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
| @@ -46,20 +45,16 @@ class MixcloudIE(InfoExtractor): | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _get_url(self, track_id, template_url, server_number): | ||||
|         boundaries = (1, 30) | ||||
|         for nr in server_numbers(server_number, boundaries): | ||||
|             url = template_url % nr | ||||
|             try: | ||||
|                 # We only want to know if the request succeed | ||||
|                 # don't download the whole file | ||||
|                 self._request_webpage( | ||||
|                     HEADRequest(url), track_id, | ||||
|                     'Checking URL %d/%d ...' % (nr, boundaries[-1])) | ||||
|                 return url | ||||
|             except ExtractorError: | ||||
|                 pass | ||||
|         return None | ||||
|     def _check_url(self, url, track_id, ext): | ||||
|         try: | ||||
|             # We only want to know if the request succeed | ||||
|             # don't download the whole file | ||||
|             self._request_webpage( | ||||
|                 HEADRequest(url), track_id, | ||||
|                 'Trying %s URL' % ext) | ||||
|             return True | ||||
|         except ExtractorError: | ||||
|             return False | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -72,15 +67,10 @@ class MixcloudIE(InfoExtractor): | ||||
|         preview_url = self._search_regex( | ||||
|             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') | ||||
|         song_url = preview_url.replace('/previews/', '/c/originals/') | ||||
|         server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number')) | ||||
|         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) | ||||
|         final_song_url = self._get_url(track_id, template_url, server_number) | ||||
|         if final_song_url is None: | ||||
|             self.to_screen('Trying with m4a extension') | ||||
|             template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | ||||
|             final_song_url = self._get_url(track_id, template_url, server_number) | ||||
|         if final_song_url is None: | ||||
|             raise ExtractorError('Unable to extract track url') | ||||
|         if not self._check_url(song_url, track_id, 'mp3'): | ||||
|             song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | ||||
|             if not self._check_url(song_url, track_id, 'm4a'): | ||||
|                 raise ExtractorError('Unable to extract track url') | ||||
|  | ||||
|         PREFIX = ( | ||||
|             r'm-play-on-spacebar[^>]+' | ||||
| @@ -107,7 +97,7 @@ class MixcloudIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': track_id, | ||||
|             'title': title, | ||||
|             'url': final_song_url, | ||||
|             'url': song_url, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
| @@ -115,35 +105,3 @@ class MixcloudIE(InfoExtractor): | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| def server_numbers(first, boundaries): | ||||
|     """ Server numbers to try in descending order of probable availability. | ||||
|     Starting from first (i.e. the number of the server hosting the preview file) | ||||
|     and going further and further up to the higher boundary and down to the | ||||
|     lower one in an alternating fashion. Namely: | ||||
|  | ||||
|         server_numbers(2, (1, 5)) | ||||
|  | ||||
|         # Where the preview server is 2, min number is 1 and max is 5. | ||||
|         # Yields: 2, 3, 1, 4, 5 | ||||
|  | ||||
|     Why not random numbers or increasing sequences? Since from what I've seen, | ||||
|     full length files seem to be hosted on servers whose number is closer to | ||||
|     that of the preview; to be confirmed. | ||||
|     """ | ||||
|     zip_longest = getattr(itertools, 'zip_longest', None) | ||||
|     if zip_longest is None: | ||||
|         # python 2.x | ||||
|         zip_longest = itertools.izip_longest | ||||
|  | ||||
|     if len(boundaries) != 2: | ||||
|         raise ValueError("boundaries should be a two-element tuple") | ||||
|     min, max = boundaries | ||||
|     highs = range(first + 1, max + 1) | ||||
|     lows = range(first - 1, min - 1, -1) | ||||
|     rest = filter( | ||||
|         None, itertools.chain.from_iterable(zip_longest(highs, lows))) | ||||
|     yield first | ||||
|     for n in rest: | ||||
|         yield n | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz