mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 08:35:12 +00:00 
			
		
		
		
	[soundcloud:user] Rework extractor (Closes #6399)
This commit is contained in:
		@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'''(?x)^(?:https?://)?
 | 
			
		||||
                    (?:(?:(?:www\.|m\.)?soundcloud\.com/
 | 
			
		||||
                            (?P<uploader>[\w\d-]+)/
 | 
			
		||||
                            (?!sets/|(?:likes|tracks)/?(?:$|[?#]))
 | 
			
		||||
                            (?!(?:tracks|sets|reposts|likes|spotlight)/?(?:$|[?#]))
 | 
			
		||||
                            (?P<title>[\w\d-]+)/?
 | 
			
		||||
                            (?P<token>[^?]+?)?(?:[?].*)?$)
 | 
			
		||||
                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
 | 
			
		||||
@@ -293,60 +293,131 @@ class SoundcloudSetIE(SoundcloudIE):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SoundcloudUserIE(SoundcloudIE):
 | 
			
		||||
    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|sets|reposts|likes|spotlight)/?)?(\?.*)?$'
 | 
			
		||||
    IE_NAME = 'soundcloud:user'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://soundcloud.com/the-concept-band',
 | 
			
		||||
        'url': 'https://soundcloud.com/the-akashic-chronicler',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '9615865',
 | 
			
		||||
            'title': 'The Royal Concept',
 | 
			
		||||
            'id': '114582580',
 | 
			
		||||
            'title': 'The Akashic Chronicler (All)',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 12
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://soundcloud.com/the-concept-band/likes',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '9615865',
 | 
			
		||||
            'title': 'The Royal Concept',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 1,
 | 
			
		||||
        'playlist_mincount': 112,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '114582580',
 | 
			
		||||
            'title': 'The Akashic Chronicler (Tracks)',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 50,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '114582580',
 | 
			
		||||
            'title': 'The Akashic Chronicler (Playlists)',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 3,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '114582580',
 | 
			
		||||
            'title': 'The Akashic Chronicler (Reposts)',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 9,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '114582580',
 | 
			
		||||
            'title': 'The Akashic Chronicler (Likes)',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 333,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://soundcloud.com/grynpyret/spotlight',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '7098329',
 | 
			
		||||
            'title': 'Grynpyret (Spotlight)',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 1,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    _API_BASE = 'https://api.soundcloud.com'
 | 
			
		||||
    _API_V2_BASE = 'https://api-v2.soundcloud.com'
 | 
			
		||||
 | 
			
		||||
    _BASE_URL_MAP = {
 | 
			
		||||
        'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
 | 
			
		||||
        'tracks': '%s/users/%%s/tracks' % _API_BASE,
 | 
			
		||||
        'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
 | 
			
		||||
        'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
 | 
			
		||||
        'likes': '%s/users/%%s/likes' % _API_V2_BASE,
 | 
			
		||||
        'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _TITLE_MAP = {
 | 
			
		||||
        'all': 'All',
 | 
			
		||||
        'tracks': 'Tracks',
 | 
			
		||||
        'sets': 'Playlists',
 | 
			
		||||
        'reposts': 'Reposts',
 | 
			
		||||
        'likes': 'Likes',
 | 
			
		||||
        'spotlight': 'Spotlight',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        uploader = mobj.group('user')
 | 
			
		||||
        resource = mobj.group('rsrc')
 | 
			
		||||
        if resource is None:
 | 
			
		||||
            resource = 'tracks'
 | 
			
		||||
        elif resource == 'likes':
 | 
			
		||||
            resource = 'favorites'
 | 
			
		||||
 | 
			
		||||
        url = 'http://soundcloud.com/%s/' % uploader
 | 
			
		||||
        resolv_url = self._resolv_url(url)
 | 
			
		||||
        user = self._download_json(
 | 
			
		||||
            resolv_url, uploader, 'Downloading user info')
 | 
			
		||||
        base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource)
 | 
			
		||||
 | 
			
		||||
        resource = mobj.group('rsrc') or 'all'
 | 
			
		||||
        base_url = self._BASE_URL_MAP[resource] % user['id']
 | 
			
		||||
 | 
			
		||||
        next_href = None
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for i in itertools.count():
 | 
			
		||||
            data = compat_urllib_parse.urlencode({
 | 
			
		||||
                'offset': i * 50,
 | 
			
		||||
                'limit': 50,
 | 
			
		||||
                'client_id': self._CLIENT_ID,
 | 
			
		||||
            })
 | 
			
		||||
            new_entries = self._download_json(
 | 
			
		||||
                base_url + data, uploader, 'Downloading track page %s' % (i + 1))
 | 
			
		||||
            if len(new_entries) == 0:
 | 
			
		||||
            if not next_href:
 | 
			
		||||
                data = compat_urllib_parse.urlencode({
 | 
			
		||||
                    'offset': i * 50,
 | 
			
		||||
                    'limit': 50,
 | 
			
		||||
                    'client_id': self._CLIENT_ID,
 | 
			
		||||
                    'linked_partitioning': '1',
 | 
			
		||||
                    'representation': 'speedy',
 | 
			
		||||
                })
 | 
			
		||||
                next_href = base_url + '?' + data
 | 
			
		||||
 | 
			
		||||
            response = self._download_json(
 | 
			
		||||
                next_href, uploader, 'Downloading track page %s' % (i + 1))
 | 
			
		||||
 | 
			
		||||
            collection = response['collection']
 | 
			
		||||
 | 
			
		||||
            if not collection:
 | 
			
		||||
                self.to_screen('%s: End page received' % uploader)
 | 
			
		||||
                break
 | 
			
		||||
            entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
 | 
			
		||||
 | 
			
		||||
            def resolve_permalink_url(candidates):
 | 
			
		||||
                for cand in candidates:
 | 
			
		||||
                    if isinstance(cand, dict):
 | 
			
		||||
                        permalink_url = cand.get('permalink_url')
 | 
			
		||||
                        if permalink_url and permalink_url.startswith('http'):
 | 
			
		||||
                            return permalink_url
 | 
			
		||||
 | 
			
		||||
            for e in collection:
 | 
			
		||||
                permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
 | 
			
		||||
                if permalink_url:
 | 
			
		||||
                    entries.append(self.url_result(permalink_url))
 | 
			
		||||
 | 
			
		||||
            if 'next_href' in response:
 | 
			
		||||
                next_href = response['next_href']
 | 
			
		||||
                if not next_href:
 | 
			
		||||
                    break
 | 
			
		||||
            else:
 | 
			
		||||
                next_href = None
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'id': compat_str(user['id']),
 | 
			
		||||
            'title': user['username'],
 | 
			
		||||
            'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
 | 
			
		||||
            'entries': entries,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user