mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-11-02 07:35:13 +00:00
[cleanup] Misc fixes
Closes https://github.com/yt-dlp/yt-dlp/pull/3213, Closes https://github.com/yt-dlp/yt-dlp/pull/3117 Related: https://github.com/yt-dlp/yt-dlp/issues/3146#issuecomment-1077323114, https://github.com/yt-dlp/yt-dlp/pull/3277#discussion_r841019671,a825ffbffa (commitcomment-68538986), https://github.com/yt-dlp/yt-dlp/issues/2360,5fa3c9a88f (r70393519),5fa3c9a88f (r70393254)
This commit is contained in:
@@ -926,9 +926,9 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
if season_id and not video_data:
|
||||
# Non-Bstation layout, read through episode list
|
||||
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
|
||||
video_data = next(
|
||||
episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict)
|
||||
if str(episode.get('episode_id')) == ep_id)
|
||||
video_data = traverse_obj(season_json,
|
||||
('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id),
|
||||
expected_type=dict, get_all=False)
|
||||
return self._extract_video_info(video_data, ep_id=ep_id, aid=aid)
|
||||
|
||||
|
||||
|
||||
@@ -245,10 +245,6 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'upload_date': '20200727',
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
'username': '<snip>',
|
||||
'password': '<snip>',
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# Only available via new API endpoint
|
||||
@@ -264,10 +260,6 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'episode_number': 5,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
'username': '<snip>',
|
||||
'password': '<snip>',
|
||||
},
|
||||
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrtnu'
|
||||
|
||||
@@ -139,6 +139,8 @@ class InfoExtractor(object):
|
||||
for HDS - URL of the F4M manifest,
|
||||
for DASH - URL of the MPD manifest,
|
||||
for MSS - URL of the ISM manifest.
|
||||
* manifest_stream_number (For internal use only)
|
||||
The index of the stream in the manifest file
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
@@ -215,7 +217,7 @@ class InfoExtractor(object):
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* has_drm The format has DRM and cannot be downloaded. Boolean
|
||||
* downloader_options A dictionary of downloader options as
|
||||
described in FileDownloader
|
||||
described in FileDownloader (For internal use only)
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
|
||||
rtmp_protocol, rtmp_real_time
|
||||
@@ -3684,9 +3686,9 @@ class InfoExtractor(object):
|
||||
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
|
||||
""" Merge subtitle items for one language. Items with duplicated URLs/data
|
||||
will be dropped. """
|
||||
list1_data = set([item.get('url') or item['data'] for item in subtitle_list1])
|
||||
list1_data = set((item.get('url'), item.get('data')) for item in subtitle_list1)
|
||||
ret = list(subtitle_list1)
|
||||
ret.extend([item for item in subtitle_list2 if (item.get('url') or item['data']) not in list1_data])
|
||||
ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data)
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -123,7 +123,7 @@ class DropoutIE(InfoExtractor):
|
||||
self._login(display_id)
|
||||
webpage = self._download_webpage(url, display_id, note='Downloading video webpage')
|
||||
finally:
|
||||
self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out')
|
||||
self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False)
|
||||
|
||||
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
@@ -139,7 +139,7 @@ class DropoutIE(InfoExtractor):
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': VHXEmbedIE.ie_key(),
|
||||
'url': embed_url,
|
||||
'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'),
|
||||
'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'),
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage, fatal=False),
|
||||
|
||||
@@ -397,8 +397,10 @@ class FacebookIE(InfoExtractor):
|
||||
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or []
|
||||
if str(m.get('id')) == video_id and m.get('__typename') == 'Video']
|
||||
media = traverse_obj(
|
||||
post,
|
||||
(..., 'attachments', ..., 'media', lambda _, m: str(m['id']) == video_id and m['__typename'] == 'Video'),
|
||||
expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||
uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
|
||||
|
||||
@@ -2523,7 +2523,7 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa – metsästysreissu huipentuu kasvissyöjän painajaiseen!',
|
||||
'thumbnail': r're:^https?://.+\.jpg$',
|
||||
'duration': 108,
|
||||
'series' : 'Madventures Suomi',
|
||||
'series': 'Madventures Suomi',
|
||||
'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381',
|
||||
'categories': ['Matkailu', 'Elämäntyyli'],
|
||||
'age_limit': 0,
|
||||
@@ -3886,8 +3886,8 @@ class GenericIE(InfoExtractor):
|
||||
if RtmpIE.suitable(vurl):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
|
||||
vext = determine_ext(vpath, None)
|
||||
return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
|
||||
|
||||
def filter_video(urls):
|
||||
return list(filter(check_video, urls))
|
||||
|
||||
@@ -194,7 +194,7 @@ class LimelightBaseIE(InfoExtractor):
|
||||
cc_url = cc.get('webvttFileUrl')
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en')
|
||||
lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
|
||||
@@ -469,7 +469,7 @@ class NiconicoIE(InfoExtractor):
|
||||
comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey'))
|
||||
user_id_str = session_api_data.get('serviceUserId')
|
||||
|
||||
thread_ids = [x for x in traverse_obj(api_data, ('comment', 'threads')) or [] if x['isActive']]
|
||||
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
|
||||
raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key)
|
||||
if not raw_danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
|
||||
@@ -264,7 +264,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
return {
|
||||
'id': aweme_id,
|
||||
'title': aweme_detail.get('desc'),
|
||||
'description': aweme_detail['desc'],
|
||||
'description': aweme_detail.get('desc'),
|
||||
'view_count': int_or_none(stats_info.get('play_count')),
|
||||
'like_count': int_or_none(stats_info.get('digg_count')),
|
||||
'repost_count': int_or_none(stats_info.get('share_count')),
|
||||
@@ -387,6 +387,9 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'artist': 'Ysrbeats',
|
||||
'album': 'Lehanga',
|
||||
'track': 'Lehanga',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
|
||||
@@ -410,6 +413,8 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson',
|
||||
'track': 'Big Fun',
|
||||
}
|
||||
}, {
|
||||
# Banned audio, only available on the app
|
||||
@@ -463,7 +468,7 @@ class TikTokIE(TikTokBaseIE):
|
||||
'info_dict': {
|
||||
'id': '7059698374567611694',
|
||||
'ext': 'mp4',
|
||||
'title': 'N/A',
|
||||
'title': 'tiktok video #7059698374567611694',
|
||||
'description': '',
|
||||
'uploader': 'pokemonlife22',
|
||||
'creator': 'Pokemon',
|
||||
@@ -480,7 +485,7 @@ class TikTokIE(TikTokBaseIE):
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['Video not available']
|
||||
'expected_warnings': ['Video not available', 'Creating a generic title']
|
||||
}, {
|
||||
# Auto-captions available
|
||||
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
|
||||
|
||||
@@ -163,7 +163,6 @@ class YandexVideoPreviewIE(InfoExtractor):
|
||||
'thumbnail': 'https://i.mycdn.me/videoPreview?id=544866765315&type=37&idx=13&tkn=TY5qjLYZHxpmcnK8U2LgzYkgmaU&fn=external_8',
|
||||
'uploader_id': '481054701571',
|
||||
'title': 'LOFT - summer, summer, summer HD',
|
||||
'manifest_stream_number': 0,
|
||||
'uploader': 'АРТЁМ КУДРОВ',
|
||||
},
|
||||
}, { # youtube
|
||||
|
||||
@@ -837,17 +837,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
|
||||
channel_id = traverse_obj(
|
||||
renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
|
||||
renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
|
||||
expected_type=str, get_all=False)
|
||||
timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
|
||||
scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
|
||||
overlay_style = traverse_obj(
|
||||
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
|
||||
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
|
||||
get_all=False, expected_type=str)
|
||||
badges = self._extract_badges(renderer)
|
||||
thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
|
||||
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
|
||||
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str))
|
||||
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
|
||||
expected_type=str)) or ''
|
||||
url = f'https://www.youtube.com/watch?v={video_id}'
|
||||
if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url):
|
||||
if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
|
||||
url = f'https://www.youtube.com/shorts/{video_id}'
|
||||
|
||||
return {
|
||||
@@ -862,7 +865,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'channel_id': channel_id,
|
||||
'thumbnails': thumbnails,
|
||||
'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None,
|
||||
'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
|
||||
if self._configuration_arg('approximate_date', ie_key='youtubetab')
|
||||
else None),
|
||||
'live_status': ('is_upcoming' if scheduled_timestamp is not None
|
||||
else 'was_live' if 'streamed' in time_text.lower()
|
||||
else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
|
||||
|
||||
Reference in New Issue
Block a user