mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 00:25:15 +00:00 
			
		
		
		
	[youtube] Fix history, trending and mix playlists (#136)
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com> Co-authored-by: Matthew <colethedj@protonmail.com>
This commit is contained in:
		@@ -59,9 +59,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
			
		||||
    _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 | 
			
		||||
 | 
			
		||||
    _RESERVED_NAMES = (
 | 
			
		||||
        r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|hashtag|'
 | 
			
		||||
        r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
 | 
			
		||||
        r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
 | 
			
		||||
        r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
 | 
			
		||||
        r'movies|results|shared|hashtag|trending|feed|feeds|'
 | 
			
		||||
        r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 | 
			
		||||
 | 
			
		||||
    _NETRC_MACHINE = 'youtube'
 | 
			
		||||
    # If True it will raise an error if no login info is provided
 | 
			
		||||
@@ -2520,17 +2520,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
            channel_url, 'channel id')
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _extract_grid_item_renderer(item):
 | 
			
		||||
        for item_kind in ('Playlist', 'Video', 'Channel'):
 | 
			
		||||
            renderer = item.get('grid%sRenderer' % item_kind)
 | 
			
		||||
            if renderer:
 | 
			
		||||
                return renderer
 | 
			
		||||
    def _extract_basic_item_renderer(item):
 | 
			
		||||
        # Modified from _extract_grid_item_renderer
 | 
			
		||||
        known_renderers = (
 | 
			
		||||
            'playlistRenderer', 'videoRenderer', 'channelRenderer'
 | 
			
		||||
            'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
 | 
			
		||||
        )
 | 
			
		||||
        for key, renderer in item.items():
 | 
			
		||||
            if key not in known_renderers:
 | 
			
		||||
                continue
 | 
			
		||||
            return renderer
 | 
			
		||||
 | 
			
		||||
    def _grid_entries(self, grid_renderer):
 | 
			
		||||
        for item in grid_renderer['items']:
 | 
			
		||||
            if not isinstance(item, dict):
 | 
			
		||||
                continue
 | 
			
		||||
            renderer = self._extract_grid_item_renderer(item)
 | 
			
		||||
            renderer = self._extract_basic_item_renderer(item)
 | 
			
		||||
            if not isinstance(renderer, dict):
 | 
			
		||||
                continue
 | 
			
		||||
            title = try_get(
 | 
			
		||||
@@ -2559,7 +2564,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        content = shelf_renderer.get('content')
 | 
			
		||||
        if not isinstance(content, dict):
 | 
			
		||||
            return
 | 
			
		||||
        renderer = content.get('gridRenderer')
 | 
			
		||||
        renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
 | 
			
		||||
        if renderer:
 | 
			
		||||
            # TODO: add support for nested playlists so each shelf is processed
 | 
			
		||||
            # as separate playlist
 | 
			
		||||
@@ -2601,20 +2606,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                continue
 | 
			
		||||
            yield self._extract_video(renderer)
 | 
			
		||||
 | 
			
		||||
    r""" # Not needed in the new implementation
 | 
			
		||||
    def _itemSection_entries(self, item_sect_renderer):
 | 
			
		||||
        for content in item_sect_renderer['contents']:
 | 
			
		||||
            if not isinstance(content, dict):
 | 
			
		||||
                continue
 | 
			
		||||
            renderer = content.get('videoRenderer', {})
 | 
			
		||||
            if not isinstance(renderer, dict):
 | 
			
		||||
                continue
 | 
			
		||||
            video_id = renderer.get('videoId')
 | 
			
		||||
            if not video_id:
 | 
			
		||||
                continue
 | 
			
		||||
            yield self._extract_video(renderer)
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def _rich_entries(self, rich_grid_renderer):
 | 
			
		||||
        renderer = try_get(
 | 
			
		||||
            rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
 | 
			
		||||
@@ -2837,7 +2828,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                'gridPlaylistRenderer': (self._grid_entries, 'items'),
 | 
			
		||||
                'gridVideoRenderer': (self._grid_entries, 'items'),
 | 
			
		||||
                'playlistVideoRenderer': (self._playlist_entries, 'contents'),
 | 
			
		||||
                'itemSectionRenderer': (self._playlist_entries, 'contents'),
 | 
			
		||||
                'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
 | 
			
		||||
                'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
 | 
			
		||||
            }
 | 
			
		||||
            continuation_items = try_get(
 | 
			
		||||
@@ -2955,13 +2946,30 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
            self._entries(selected_tab, identity_token, playlist_id),
 | 
			
		||||
            **metadata)
 | 
			
		||||
 | 
			
		||||
    def _extract_mix_playlist(self, playlist, playlist_id):
 | 
			
		||||
        page_num = 0
 | 
			
		||||
        while True:
 | 
			
		||||
            videos = list(self._playlist_entries(playlist))
 | 
			
		||||
            if not videos:
 | 
			
		||||
                return
 | 
			
		||||
            video_count = len(videos)
 | 
			
		||||
            start = min(video_count - 24, 26) if video_count > 25 else 0
 | 
			
		||||
            for item in videos[start:]:
 | 
			
		||||
                yield item
 | 
			
		||||
 | 
			
		||||
            page_num += 1
 | 
			
		||||
            _, data = self._extract_webpage(
 | 
			
		||||
                'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, videos[-1]['id']),
 | 
			
		||||
                '%s page %d' % (playlist_id, page_num))
 | 
			
		||||
            playlist = try_get(
 | 
			
		||||
                data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
 | 
			
		||||
 | 
			
		||||
    def _extract_from_playlist(self, item_id, url, data, playlist):
 | 
			
		||||
        title = playlist.get('title') or try_get(
 | 
			
		||||
            data, lambda x: x['titleText']['simpleText'], compat_str)
 | 
			
		||||
        playlist_id = playlist.get('playlistId') or item_id
 | 
			
		||||
        # Inline playlist rendition continuation does not always work
 | 
			
		||||
        # at Youtube side, so delegating regular tab-based playlist URL
 | 
			
		||||
        # processing whenever possible.
 | 
			
		||||
 | 
			
		||||
        # Delegating everything except mix playlists to regular tab-based playlist URL
 | 
			
		||||
        playlist_url = urljoin(url, try_get(
 | 
			
		||||
            playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
 | 
			
		||||
            compat_str))
 | 
			
		||||
@@ -2969,9 +2977,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
            return self.url_result(
 | 
			
		||||
                playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
 | 
			
		||||
                video_title=title)
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(
 | 
			
		||||
            self._playlist_entries(playlist), playlist_id=playlist_id,
 | 
			
		||||
            playlist_title=title)
 | 
			
		||||
            self._extract_mix_playlist(playlist, playlist_id),
 | 
			
		||||
            playlist_id=playlist_id, playlist_title=title)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _extract_alerts(data):
 | 
			
		||||
@@ -3001,35 +3010,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
            r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 | 
			
		||||
            'identity token', default=None)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        item_id = self._match_id(url)
 | 
			
		||||
        url = compat_urlparse.urlunparse(
 | 
			
		||||
            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
 | 
			
		||||
        is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
 | 
			
		||||
        if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
 | 
			
		||||
            self._downloader.report_warning(
 | 
			
		||||
                'A channel/user page was given. All the channel\'s videos will be downloaded. '
 | 
			
		||||
                'To download only the videos in the home page, add a "/featured" to the URL')
 | 
			
		||||
            url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
 | 
			
		||||
 | 
			
		||||
        # Handle both video/playlist URLs
 | 
			
		||||
        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
 | 
			
		||||
        video_id = qs.get('v', [None])[0]
 | 
			
		||||
        playlist_id = qs.get('list', [None])[0]
 | 
			
		||||
 | 
			
		||||
        if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
 | 
			
		||||
            if playlist_id:
 | 
			
		||||
                self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
 | 
			
		||||
                url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
 | 
			
		||||
                # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
 | 
			
		||||
            else:
 | 
			
		||||
                raise ExtractorError('Unable to recognize tab page')
 | 
			
		||||
        if video_id and playlist_id:
 | 
			
		||||
            if self._downloader.params.get('noplaylist'):
 | 
			
		||||
                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
 | 
			
		||||
                return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
 | 
			
		||||
            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 | 
			
		||||
 | 
			
		||||
    def _extract_webpage(self, url, item_id):
 | 
			
		||||
        retries = self._downloader.params.get('extractor_retries', 3)
 | 
			
		||||
        count = -1
 | 
			
		||||
        last_error = 'Incomplete yt initial data recieved'
 | 
			
		||||
@@ -3041,8 +3022,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                self.report_warning('%s. Retrying ...' % last_error)
 | 
			
		||||
            webpage = self._download_webpage(
 | 
			
		||||
                url, item_id,
 | 
			
		||||
                'Downloading webpage%s' % ' (retry #%d)' % count if count else '')
 | 
			
		||||
            identity_token = self._extract_identity_token(webpage, item_id)
 | 
			
		||||
                'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
 | 
			
		||||
            data = self._extract_yt_initial_data(item_id, webpage)
 | 
			
		||||
            err_msg = None
 | 
			
		||||
            for alert_type, alert_message in self._extract_alerts(data):
 | 
			
		||||
@@ -3058,23 +3038,61 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                break
 | 
			
		||||
            if count >= retries:
 | 
			
		||||
                self._downloader.report_error(last_error)
 | 
			
		||||
        return webpage, data
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        item_id = self._match_id(url)
 | 
			
		||||
        url = compat_urlparse.urlunparse(
 | 
			
		||||
            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
 | 
			
		||||
 | 
			
		||||
        # This is not matched in a channel page with a tab selected
 | 
			
		||||
        mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
 | 
			
		||||
        mobj = mobj.groupdict() if mobj else {}
 | 
			
		||||
        if mobj and not mobj.get('not_channel'):
 | 
			
		||||
            self._downloader.report_warning(
 | 
			
		||||
                'A channel/user page was given. All the channel\'s videos will be downloaded. '
 | 
			
		||||
                'To download only the videos in the home page, add a "/featured" to the URL')
 | 
			
		||||
            url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
 | 
			
		||||
 | 
			
		||||
        # Handle both video/playlist URLs
 | 
			
		||||
        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
 | 
			
		||||
        video_id = qs.get('v', [None])[0]
 | 
			
		||||
        playlist_id = qs.get('list', [None])[0]
 | 
			
		||||
 | 
			
		||||
        if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
 | 
			
		||||
            if not playlist_id:
 | 
			
		||||
                # If there is neither video or playlist ids,
 | 
			
		||||
                # youtube redirects to home page, which is undesirable
 | 
			
		||||
                raise ExtractorError('Unable to recognize tab page')
 | 
			
		||||
            self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
 | 
			
		||||
            url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
 | 
			
		||||
 | 
			
		||||
        if video_id and playlist_id:
 | 
			
		||||
            if self._downloader.params.get('noplaylist'):
 | 
			
		||||
                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
 | 
			
		||||
                return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
 | 
			
		||||
            self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
 | 
			
		||||
 | 
			
		||||
        webpage, data = self._extract_webpage(url, item_id)
 | 
			
		||||
 | 
			
		||||
        tabs = try_get(
 | 
			
		||||
            data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
 | 
			
		||||
        if tabs:
 | 
			
		||||
            identity_token = self._extract_identity_token(webpage, item_id)
 | 
			
		||||
            return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
 | 
			
		||||
 | 
			
		||||
        playlist = try_get(
 | 
			
		||||
            data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
 | 
			
		||||
        if playlist:
 | 
			
		||||
            return self._extract_from_playlist(item_id, url, data, playlist)
 | 
			
		||||
        # Fallback to video extraction if no playlist alike page is recognized.
 | 
			
		||||
        # First check for the current video then try the v attribute of URL query.
 | 
			
		||||
 | 
			
		||||
        video_id = try_get(
 | 
			
		||||
            data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
 | 
			
		||||
            compat_str) or video_id
 | 
			
		||||
        if video_id:
 | 
			
		||||
            self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
 | 
			
		||||
            return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
 | 
			
		||||
        # Failed to recognize
 | 
			
		||||
 | 
			
		||||
        raise ExtractorError('Unable to recognize tab page')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -3338,7 +3356,6 @@ class YoutubeFeedsInfoExtractor(YoutubeTabIE):
 | 
			
		||||
    Subclasses must define the _FEED_NAME property.
 | 
			
		||||
    """
 | 
			
		||||
    _LOGIN_REQUIRED = True
 | 
			
		||||
    # _MAX_PAGES = 5
 | 
			
		||||
    _TESTS = []
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user