mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 08:35:12 +00:00 
			
		
		
		
	[soundcloud:user] Rework extractor (Closes #6399)
This commit is contained in:
		@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
 | 
				
			|||||||
    _VALID_URL = r'''(?x)^(?:https?://)?
 | 
					    _VALID_URL = r'''(?x)^(?:https?://)?
 | 
				
			||||||
                    (?:(?:(?:www\.|m\.)?soundcloud\.com/
 | 
					                    (?:(?:(?:www\.|m\.)?soundcloud\.com/
 | 
				
			||||||
                            (?P<uploader>[\w\d-]+)/
 | 
					                            (?P<uploader>[\w\d-]+)/
 | 
				
			||||||
                            (?!sets/|(?:likes|tracks)/?(?:$|[?#]))
 | 
					                            (?!(?:tracks|sets|reposts|likes|spotlight)/?(?:$|[?#]))
 | 
				
			||||||
                            (?P<title>[\w\d-]+)/?
 | 
					                            (?P<title>[\w\d-]+)/?
 | 
				
			||||||
                            (?P<token>[^?]+?)?(?:[?].*)?$)
 | 
					                            (?P<token>[^?]+?)?(?:[?].*)?$)
 | 
				
			||||||
                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
 | 
					                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
 | 
				
			||||||
@@ -293,60 +293,131 @@ class SoundcloudSetIE(SoundcloudIE):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class SoundcloudUserIE(SoundcloudIE):
 | 
					class SoundcloudUserIE(SoundcloudIE):
 | 
				
			||||||
    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
 | 
					    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|sets|reposts|likes|spotlight)/?)?(\?.*)?$'
 | 
				
			||||||
    IE_NAME = 'soundcloud:user'
 | 
					    IE_NAME = 'soundcloud:user'
 | 
				
			||||||
    _TESTS = [{
 | 
					    _TESTS = [{
 | 
				
			||||||
        'url': 'https://soundcloud.com/the-concept-band',
 | 
					        'url': 'https://soundcloud.com/the-akashic-chronicler',
 | 
				
			||||||
        'info_dict': {
 | 
					        'info_dict': {
 | 
				
			||||||
            'id': '9615865',
 | 
					            'id': '114582580',
 | 
				
			||||||
            'title': 'The Royal Concept',
 | 
					            'title': 'The Akashic Chronicler (All)',
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        'playlist_mincount': 12
 | 
					        'playlist_mincount': 112,
 | 
				
			||||||
    }, {
 | 
					 | 
				
			||||||
        'url': 'https://soundcloud.com/the-concept-band/likes',
 | 
					 | 
				
			||||||
        'info_dict': {
 | 
					 | 
				
			||||||
            'id': '9615865',
 | 
					 | 
				
			||||||
            'title': 'The Royal Concept',
 | 
					 | 
				
			||||||
        },
 | 
					 | 
				
			||||||
        'playlist_mincount': 1,
 | 
					 | 
				
			||||||
    }, {
 | 
					    }, {
 | 
				
			||||||
        'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
 | 
					        'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
 | 
				
			||||||
        'only_matching': True,
 | 
					        'info_dict': {
 | 
				
			||||||
 | 
					            'id': '114582580',
 | 
				
			||||||
 | 
					            'title': 'The Akashic Chronicler (Tracks)',
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'playlist_mincount': 50,
 | 
				
			||||||
 | 
					    }, {
 | 
				
			||||||
 | 
					        'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
 | 
				
			||||||
 | 
					        'info_dict': {
 | 
				
			||||||
 | 
					            'id': '114582580',
 | 
				
			||||||
 | 
					            'title': 'The Akashic Chronicler (Playlists)',
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'playlist_mincount': 3,
 | 
				
			||||||
 | 
					    }, {
 | 
				
			||||||
 | 
					        'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
 | 
				
			||||||
 | 
					        'info_dict': {
 | 
				
			||||||
 | 
					            'id': '114582580',
 | 
				
			||||||
 | 
					            'title': 'The Akashic Chronicler (Reposts)',
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'playlist_mincount': 9,
 | 
				
			||||||
 | 
					    }, {
 | 
				
			||||||
 | 
					        'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
 | 
				
			||||||
 | 
					        'info_dict': {
 | 
				
			||||||
 | 
					            'id': '114582580',
 | 
				
			||||||
 | 
					            'title': 'The Akashic Chronicler (Likes)',
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'playlist_mincount': 333,
 | 
				
			||||||
 | 
					    }, {
 | 
				
			||||||
 | 
					        'url': 'https://soundcloud.com/grynpyret/spotlight',
 | 
				
			||||||
 | 
					        'info_dict': {
 | 
				
			||||||
 | 
					            'id': '7098329',
 | 
				
			||||||
 | 
					            'title': 'Grynpyret (Spotlight)',
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'playlist_mincount': 1,
 | 
				
			||||||
    }]
 | 
					    }]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _API_BASE = 'https://api.soundcloud.com'
 | 
				
			||||||
 | 
					    _API_V2_BASE = 'https://api-v2.soundcloud.com'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _BASE_URL_MAP = {
 | 
				
			||||||
 | 
					        'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
 | 
				
			||||||
 | 
					        'tracks': '%s/users/%%s/tracks' % _API_BASE,
 | 
				
			||||||
 | 
					        'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
 | 
				
			||||||
 | 
					        'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
 | 
				
			||||||
 | 
					        'likes': '%s/users/%%s/likes' % _API_V2_BASE,
 | 
				
			||||||
 | 
					        'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _TITLE_MAP = {
 | 
				
			||||||
 | 
					        'all': 'All',
 | 
				
			||||||
 | 
					        'tracks': 'Tracks',
 | 
				
			||||||
 | 
					        'sets': 'Playlists',
 | 
				
			||||||
 | 
					        'reposts': 'Reposts',
 | 
				
			||||||
 | 
					        'likes': 'Likes',
 | 
				
			||||||
 | 
					        'spotlight': 'Spotlight',
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        mobj = re.match(self._VALID_URL, url)
 | 
					        mobj = re.match(self._VALID_URL, url)
 | 
				
			||||||
        uploader = mobj.group('user')
 | 
					        uploader = mobj.group('user')
 | 
				
			||||||
        resource = mobj.group('rsrc')
 | 
					 | 
				
			||||||
        if resource is None:
 | 
					 | 
				
			||||||
            resource = 'tracks'
 | 
					 | 
				
			||||||
        elif resource == 'likes':
 | 
					 | 
				
			||||||
            resource = 'favorites'
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        url = 'http://soundcloud.com/%s/' % uploader
 | 
					        url = 'http://soundcloud.com/%s/' % uploader
 | 
				
			||||||
        resolv_url = self._resolv_url(url)
 | 
					        resolv_url = self._resolv_url(url)
 | 
				
			||||||
        user = self._download_json(
 | 
					        user = self._download_json(
 | 
				
			||||||
            resolv_url, uploader, 'Downloading user info')
 | 
					            resolv_url, uploader, 'Downloading user info')
 | 
				
			||||||
        base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource)
 | 
					
 | 
				
			||||||
 | 
					        resource = mobj.group('rsrc') or 'all'
 | 
				
			||||||
 | 
					        base_url = self._BASE_URL_MAP[resource] % user['id']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        next_href = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        entries = []
 | 
					        entries = []
 | 
				
			||||||
        for i in itertools.count():
 | 
					        for i in itertools.count():
 | 
				
			||||||
 | 
					            if not next_href:
 | 
				
			||||||
                data = compat_urllib_parse.urlencode({
 | 
					                data = compat_urllib_parse.urlencode({
 | 
				
			||||||
                    'offset': i * 50,
 | 
					                    'offset': i * 50,
 | 
				
			||||||
                    'limit': 50,
 | 
					                    'limit': 50,
 | 
				
			||||||
                    'client_id': self._CLIENT_ID,
 | 
					                    'client_id': self._CLIENT_ID,
 | 
				
			||||||
 | 
					                    'linked_partitioning': '1',
 | 
				
			||||||
 | 
					                    'representation': 'speedy',
 | 
				
			||||||
                })
 | 
					                })
 | 
				
			||||||
            new_entries = self._download_json(
 | 
					                next_href = base_url + '?' + data
 | 
				
			||||||
                base_url + data, uploader, 'Downloading track page %s' % (i + 1))
 | 
					
 | 
				
			||||||
            if len(new_entries) == 0:
 | 
					            response = self._download_json(
 | 
				
			||||||
 | 
					                next_href, uploader, 'Downloading track page %s' % (i + 1))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            collection = response['collection']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if not collection:
 | 
				
			||||||
                self.to_screen('%s: End page received' % uploader)
 | 
					                self.to_screen('%s: End page received' % uploader)
 | 
				
			||||||
                break
 | 
					                break
 | 
				
			||||||
            entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
 | 
					
 | 
				
			||||||
 | 
					            def resolve_permalink_url(candidates):
 | 
				
			||||||
 | 
					                for cand in candidates:
 | 
				
			||||||
 | 
					                    if isinstance(cand, dict):
 | 
				
			||||||
 | 
					                        permalink_url = cand.get('permalink_url')
 | 
				
			||||||
 | 
					                        if permalink_url and permalink_url.startswith('http'):
 | 
				
			||||||
 | 
					                            return permalink_url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for e in collection:
 | 
				
			||||||
 | 
					                permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
 | 
				
			||||||
 | 
					                if permalink_url:
 | 
				
			||||||
 | 
					                    entries.append(self.url_result(permalink_url))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if 'next_href' in response:
 | 
				
			||||||
 | 
					                next_href = response['next_href']
 | 
				
			||||||
 | 
					                if not next_href:
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                next_href = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return {
 | 
					        return {
 | 
				
			||||||
            '_type': 'playlist',
 | 
					            '_type': 'playlist',
 | 
				
			||||||
            'id': compat_str(user['id']),
 | 
					            'id': compat_str(user['id']),
 | 
				
			||||||
            'title': user['username'],
 | 
					            'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
 | 
				
			||||||
            'entries': entries,
 | 
					            'entries': entries,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user