mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[extractor/youtube] Misc cleanup
Authored by: coletdjnz
This commit is contained in:
		| @@ -292,6 +292,7 @@ class BadgeType(enum.Enum): | ||||
|     AVAILABILITY_PREMIUM = enum.auto() | ||||
|     AVAILABILITY_SUBSCRIPTION = enum.auto() | ||||
|     LIVE_NOW = enum.auto() | ||||
|     VERIFIED = enum.auto() | ||||
| 
 | ||||
| 
 | ||||
| class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
| @@ -791,17 +792,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|     def _extract_and_report_alerts(self, data, *args, **kwargs): | ||||
|         return self._report_alerts(self._extract_alerts(data), *args, **kwargs) | ||||
| 
 | ||||
|     def _extract_badges(self, renderer: dict): | ||||
|         privacy_icon_map = { | ||||
|     def _extract_badges(self, badge_list: list): | ||||
|         """ | ||||
|         Extract known BadgeType's from a list of badge renderers. | ||||
|         @returns [{'type': BadgeType}] | ||||
|         """ | ||||
|         icon_type_map = { | ||||
|             'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED, | ||||
|             'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE, | ||||
|             'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC | ||||
|             'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC, | ||||
|             'CHECK_CIRCLE_THICK': BadgeType.VERIFIED, | ||||
|             'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED, | ||||
|         } | ||||
| 
 | ||||
|         badge_style_map = { | ||||
|             'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION, | ||||
|             'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM, | ||||
|             'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW | ||||
|             'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW, | ||||
|         } | ||||
| 
 | ||||
|         label_map = { | ||||
| @@ -809,13 +816,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             'private': BadgeType.AVAILABILITY_PRIVATE, | ||||
|             'members only': BadgeType.AVAILABILITY_SUBSCRIPTION, | ||||
|             'live': BadgeType.LIVE_NOW, | ||||
|             'premium': BadgeType.AVAILABILITY_PREMIUM | ||||
|             'premium': BadgeType.AVAILABILITY_PREMIUM, | ||||
|         } | ||||
| 
 | ||||
|         badges = [] | ||||
|         for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')): | ||||
|         for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))): | ||||
|             badge_type = ( | ||||
|                 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str)) | ||||
|                 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str)) | ||||
|                 or badge_style_map.get(traverse_obj(badge, 'style')) | ||||
|             ) | ||||
|             if badge_type: | ||||
| @@ -823,11 +830,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|                 continue | ||||
| 
 | ||||
|             # fallback, won't work in some languages | ||||
|             label = traverse_obj(badge, 'label', expected_type=str, default='') | ||||
|             label = traverse_obj( | ||||
|                 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='') | ||||
|             for match, label_badge_type in label_map.items(): | ||||
|                 if match in label.lower(): | ||||
|                     badges.append({'type': badge_type}) | ||||
|                     continue | ||||
|                     badges.append({'type': label_badge_type}) | ||||
|                     break | ||||
| 
 | ||||
|         return badges | ||||
| 
 | ||||
| @@ -1020,8 +1028,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|         overlay_style = traverse_obj( | ||||
|             renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), | ||||
|             get_all=False, expected_type=str) | ||||
|         badges = self._extract_badges(renderer) | ||||
| 
 | ||||
|         badges = self._extract_badges(traverse_obj(renderer, 'badges')) | ||||
|         navigation_url = urljoin('https://www.youtube.com/', traverse_obj( | ||||
|             renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), | ||||
|             expected_type=str)) or '' | ||||
| @@ -1079,7 +1086,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|                     needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None, | ||||
|                     is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None), | ||||
|             view_count_field: view_count, | ||||
|             'live_status': live_status | ||||
|             'live_status': live_status, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| @@ -1332,6 +1339,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'Philipp Hagemeister', | ||||
|                 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', | ||||
|                 'uploader_id': '@PhilippHagemeister', | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
| @@ -1415,6 +1423,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'The Witcher', | ||||
|                 'uploader_url': 'https://www.youtube.com/@thewitcher', | ||||
|                 'uploader_id': '@thewitcher', | ||||
|                 'comment_count': int, | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -1894,6 +1904,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'Bernie Sanders', | ||||
|                 'uploader_url': 'https://www.youtube.com/@BernieSanders', | ||||
|                 'uploader_id': '@BernieSanders', | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
| @@ -1955,6 +1966,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'Vsauce', | ||||
|                 'uploader_url': 'https://www.youtube.com/@Vsauce', | ||||
|                 'uploader_id': '@Vsauce', | ||||
|                 'comment_count': int, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
| @@ -2147,6 +2159,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'kudvenkat', | ||||
|                 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot', | ||||
|                 'uploader_id': '@Csharp-video-tutorialsBlogspot', | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
| @@ -2227,6 +2240,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'CBS Mornings', | ||||
|                 'uploader_url': 'https://www.youtube.com/@CBSMornings', | ||||
|                 'uploader_id': '@CBSMornings', | ||||
|                 'comment_count': int, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
| @@ -2297,6 +2311,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'colinfurze', | ||||
|                 'uploader_url': 'https://www.youtube.com/@colinfurze', | ||||
|                 'uploader_id': '@colinfurze', | ||||
|                 'comment_count': int, | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'format': '17',  # 3gp format available on android | ||||
| @@ -2342,6 +2358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'SciShow', | ||||
|                 'uploader_url': 'https://www.youtube.com/@SciShow', | ||||
|                 'uploader_id': '@SciShow', | ||||
|                 'comment_count': int, | ||||
|                 'heatmap': 'count:100', | ||||
|             }, 'params': {'format': 'mhtml', 'skip_download': True} | ||||
|         }, { | ||||
|             # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) | ||||
| @@ -2370,6 +2388,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'Leon Nguyen', | ||||
|                 'uploader_url': 'https://www.youtube.com/@LeonNguyen', | ||||
|                 'uploader_id': '@LeonNguyen', | ||||
|                 'heatmap': 'count:100', | ||||
|             } | ||||
|         }, { | ||||
|             # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date | ||||
| @@ -2398,6 +2417,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'Leon Nguyen', | ||||
|                 'uploader_url': 'https://www.youtube.com/@LeonNguyen', | ||||
|                 'uploader_id': '@LeonNguyen', | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|             'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']} | ||||
|         }, { | ||||
| @@ -2428,6 +2448,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'Quackity', | ||||
|                 'uploader_id': '@Quackity', | ||||
|                 'uploader_url': 'https://www.youtube.com/@Quackity', | ||||
|                 'comment_count': int, | ||||
|                 'heatmap': 'count:100', | ||||
|             } | ||||
|         }, | ||||
|         {   # continuous livestream. Microformat upload date should be preferred. | ||||
| @@ -2594,6 +2616,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'MrBeast', | ||||
|                 'uploader_url': 'https://www.youtube.com/@MrBeast', | ||||
|                 'uploader_id': '@MrBeast', | ||||
|                 'comment_count': int, | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|             'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'}, | ||||
|         }, { | ||||
| @@ -2655,6 +2679,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'さなちゃんねる', | ||||
|                 'uploader_url': 'https://www.youtube.com/@sana_natori', | ||||
|                 'uploader_id': '@sana_natori', | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -2684,6 +2709,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'thumbnail': r're:^https?://.*\.webp', | ||||
|                 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A', | ||||
|                 'playable_in_embed': True, | ||||
|                 'comment_count': int, | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}}, | ||||
| @@ -2720,6 +2747,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader': 'Christopher Sykes', | ||||
|                 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries', | ||||
|                 'uploader_id': '@ChristopherSykesDocumentaries', | ||||
|                 'heatmap': 'count:100', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
| @@ -3312,10 +3340,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         if comment_abr is not None: | ||||
|             info['is_favorited'] = 'creatorHeart' in comment_abr | ||||
| 
 | ||||
|         comment_ab_icontype = traverse_obj( | ||||
|             comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType')) | ||||
|         if comment_ab_icontype is not None: | ||||
|             info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE') | ||||
|         badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')]) | ||||
|         if self._has_badge(badges, BadgeType.VERIFIED): | ||||
|             info['author_is_verified'] = True | ||||
| 
 | ||||
|         is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge') | ||||
|         if is_pinned: | ||||
| @@ -4481,7 +4508,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             if v: | ||||
|                 info[d_k] = v | ||||
| 
 | ||||
|         badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False)) | ||||
|         badges = self._extract_badges(traverse_obj(vpir, 'badges')) | ||||
| 
 | ||||
|         is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) | ||||
|                       or get_first(video_details, 'isPrivate', expected_type=bool)) | ||||
| @@ -4554,13 +4581,14 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|         channel_id = self.ucid_or_none(renderer['channelId']) | ||||
|         title = self._get_text(renderer, 'title') | ||||
|         channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None) | ||||
|         # As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet. | ||||
|         # However we can expect them to change that in the future. | ||||
|         channel_handle = self.handle_from_url( | ||||
|             traverse_obj(renderer, ( | ||||
|                 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'), | ||||
|                                        ('browseEndpoint', 'canonicalBaseUrl')), | ||||
|                 {str}), get_all=False)) | ||||
|         if not channel_handle: | ||||
|             # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search | ||||
|             channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText')) | ||||
|         return { | ||||
|             '_type': 'url', | ||||
|             'url': channel_url, | ||||
| @@ -4573,9 +4601,15 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|             'title': title, | ||||
|             'uploader_id': channel_handle, | ||||
|             'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None), | ||||
|             'channel_follower_count': self._get_count(renderer, 'subscriberCountText'), | ||||
|             # See above. YouTube sets videoCountText to the subscriber text in search channel renderers. | ||||
|             # However, in feed/channels this is set correctly to the subscriber count | ||||
|             'channel_follower_count': traverse_obj( | ||||
|                 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count), | ||||
|             'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'), | ||||
|             'playlist_count': self._get_count(renderer, 'videoCountText'), | ||||
|             'playlist_count': ( | ||||
|                 # videoCountText may be the subscriber count | ||||
|                 self._get_count(renderer, 'videoCountText') | ||||
|                 if self._get_count(renderer, 'subscriberCountText') is not None else None), | ||||
|             'description': self._get_text(renderer, 'descriptionSnippet'), | ||||
|         } | ||||
| 
 | ||||
| @@ -5100,7 +5134,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): | ||||
|         playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {} | ||||
|         player_header_privacy = playlist_header_renderer.get('privacy') | ||||
| 
 | ||||
|         badges = self._extract_badges(sidebar_renderer) | ||||
|         badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges')) | ||||
| 
 | ||||
|         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge | ||||
|         privacy_setting_icon = get_first( | ||||
| @@ -5350,7 +5384,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): | ||||
|             'uploader_url': 'https://www.youtube.com/@3blue1brown', | ||||
|             'uploader': '3Blue1Brown', | ||||
|             'tags': ['Mathematics'], | ||||
|             'channel_follower_count': int | ||||
|             'channel_follower_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'note': 'playlists, singlepage', | ||||
| @@ -5690,7 +5724,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', | ||||
|         'info_dict': { | ||||
|             'id': 'AlTsmyW4auo',  # This will keep changing | ||||
|             'id': 'hGkQjiJLjWQ',  # This will keep changing | ||||
|             'ext': 'mp4', | ||||
|             'title': str, | ||||
|             'upload_date': r're:\d{8}', | ||||
| @@ -6202,7 +6236,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): | ||||
|                 'channel': str, | ||||
|                 'uploader': str, | ||||
|                 'uploader_url': str, | ||||
|                 'uploader_id': str | ||||
|                 'uploader_id': str, | ||||
|             } | ||||
|         }], | ||||
|         'params': {'extract_flat': True, 'playlist_items': '1'}, | ||||
| @@ -6865,12 +6899,14 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): | ||||
|                 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc', | ||||
|                 'title': 'Kurzgesagt – In a Nutshell', | ||||
|                 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q', | ||||
|                 'playlist_count': int,  # XXX: should have a way of saying > 1 | ||||
|                 # No longer available for search as it is set to the handle. | ||||
|                 # 'playlist_count': int, | ||||
|                 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q', | ||||
|                 'thumbnails': list, | ||||
|                 'uploader_id': '@kurzgesagt', | ||||
|                 'uploader_url': 'https://www.youtube.com/@kurzgesagt', | ||||
|                 'uploader': 'Kurzgesagt – In a Nutshell', | ||||
|                 'channel_follower_count': int, | ||||
|             } | ||||
|         }], | ||||
|         'params': {'extract_flat': True, 'playlist_items': '1'}, | ||||
| @@ -7134,6 +7170,8 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor): | ||||
|             'live_status': 'not_live', | ||||
|             'channel_follower_count': int, | ||||
|             'chapters': 'count:20', | ||||
|             'comment_count': int, | ||||
|             'heatmap': 'count:100', | ||||
|         } | ||||
|     }] | ||||
| 
 | ||||
| @@ -7194,6 +7232,7 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor): | ||||
|             'channel': 'さなちゃんねる', | ||||
|             'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d', | ||||
|             'uploader': 'さなちゃんねる', | ||||
|             'heatmap': 'count:100', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|         'params': {'skip_download': 'Youtube'}, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan