]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
+ webpage, 'login wrapper', default=None):
+ self.raise_login_required('This video is only available for members')
+ return self.url_result(url, 'Generic') # Fall back to generic to extract audio
+
+ real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
+ return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)
From ad0b857f459a6d390fbf124183916218c52f223a Mon Sep 17 00:00:00 2001
From: tony-hn <25278435+tony-hn@users.noreply.github.com>
Date: Thu, 26 Sep 2024 17:53:52 +0100
Subject: [PATCH 012/261] [ie/RumbleChannel] Fix extractor (#11049)
Closes #10833
Authored by: tony-hn
---
yt_dlp/extractor/rumble.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index db780a2cf4..74c7e4f176 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -8,14 +8,17 @@
UnsupportedError,
clean_html,
determine_ext,
+ extract_attributes,
format_field,
get_element_by_class,
+ get_elements_html_by_class,
int_or_none,
join_nonempty,
parse_count,
parse_iso8601,
traverse_obj,
unescapeHTML,
+ urljoin,
)
@@ -382,8 +385,10 @@ def entries(self, url, playlist_id):
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
break
raise
- for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
- yield self.url_result('https://rumble.com' + video_url)
+ for video_url in traverse_obj(
+ get_elements_html_by_class('videostream__link', webpage), (..., {extract_attributes}, 'href'),
+ ):
+ yield self.url_result(urljoin('https://rumble.com', video_url))
def _real_extract(self, url):
url, playlist_id = self._match_valid_url(url).groups()
From 5a8a05aebb49693e78e1123015837ed5e961ff76 Mon Sep 17 00:00:00 2001
From: diman8
Date: Thu, 26 Sep 2024 18:57:00 +0200
Subject: [PATCH 013/261] [ie/SVTPage] Fix extractor (#11010)
Authored by: diman8
---
yt_dlp/extractor/svt.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py
index 38782abac7..b5df2e1a18 100644
--- a/yt_dlp/extractor/svt.py
+++ b/yt_dlp/extractor/svt.py
@@ -472,7 +472,7 @@ def _real_extract(self, url):
title = self._og_search_title(webpage)
urql_state = self._search_json(
- r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
+ r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
From a2000bc85730c950351d78bb818493dc39dca3cb Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 26 Sep 2024 18:20:14 -0500
Subject: [PATCH 014/261] [ie/bilibili] Fix chapters and subtitles extraction
(#11099)
Closes #11089
Authored by: bashonly
---
yt_dlp/extractor/bilibili.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 2fe1103cb9..62f68fbc6d 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -46,6 +46,7 @@
class BilibiliBaseIE(InfoExtractor):
+ _HEADERS = {'Referer': 'https://www.bilibili.com/'}
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
_wbi_key_cache = {}
@@ -192,7 +193,7 @@ def _get_subtitles(self, video_id, cid, aid=None):
video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
- note=f'Extracting subtitle info {cid}')
+ note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
self.report_warning(
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
@@ -207,7 +208,7 @@ def _get_subtitles(self, video_id, cid, aid=None):
def _get_chapters(self, aid, cid):
chapters = aid and cid and self._download_json(
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
- note='Extracting chapters', fatal=False)
+ note='Extracting chapters', fatal=False, headers=self._HEADERS)
return traverse_obj(chapters, ('data', 'view_points', ..., {
'title': 'content',
'start_time': 'from',
@@ -1021,8 +1022,6 @@ def _real_extract(self, url):
class BilibiliCheeseBaseIE(BilibiliBaseIE):
- _HEADERS = {'Referer': 'https://www.bilibili.com/'}
-
def _extract_episode(self, season_info, ep_id):
episode_info = traverse_obj(season_info, (
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
From 9f5c9a90898c5a1e672922d9cd799716c73cee34 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 26 Sep 2024 18:21:03 -0500
Subject: [PATCH 015/261] [ie/wistia] Support password-protected videos
(#11100)
Closes #10914
Authored by: bashonly
---
yt_dlp/extractor/wistia.py | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py
index fb2a8648fd..df7ecb3cdc 100644
--- a/yt_dlp/extractor/wistia.py
+++ b/yt_dlp/extractor/wistia.py
@@ -8,6 +8,7 @@
from ..utils import (
ExtractorError,
determine_ext,
+ filter_dict,
float_or_none,
int_or_none,
parse_qs,
@@ -25,16 +26,25 @@ class WistiaBaseIE(InfoExtractor):
def _download_embed_config(self, config_type, config_id, referer):
base_url = self._EMBED_BASE_URL + f'{config_type}/{config_id}'
+ video_password = self.get_param('videopassword')
embed_config = self._download_json(
base_url + '.json', config_id, headers={
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
- })
+ }, query=filter_dict({'password': video_password}))
error = traverse_obj(embed_config, 'error')
if error:
raise ExtractorError(
f'Error while getting the playlist: {error}', expected=True)
+ if traverse_obj(embed_config, (
+ 'media', ('embed_options', 'embedOptions'), 'plugin',
+ 'passwordProtectedVideo', 'on', any)) == 'true':
+ if video_password:
+ raise ExtractorError('Invalid video password', expected=True)
+ raise ExtractorError(
+ 'This content is password-protected. Use the --video-password option', expected=True)
+
return embed_config
def _get_real_ext(self, url):
From 1d84b780cf33a1d84756825ac23f990a905703df Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 26 Sep 2024 18:26:10 -0500
Subject: [PATCH 016/261] [ie/youtube:clip] Prioritize `https` formats (#11102)
Closes #10856
Authored by: bashonly
---
yt_dlp/extractor/youtube.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index fc50dbc05f..1382c01b60 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -7655,6 +7655,8 @@ def _real_extract(self, url):
'id': clip_id,
'section_start': int(clip_data['startTimeMs']) / 1000,
'section_end': int(clip_data['endTimeMs']) / 1000,
+ '_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
+ 'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang'),
}
From eabb4680fdb09ba1f48d174a700a2e3b43f82add Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Thu, 26 Sep 2024 18:27:16 -0500
Subject: [PATCH 017/261] [ie/niconico] Fix m3u8 formats extraction (#11103)
Closes #10724
Authored by: bashonly
---
yt_dlp/extractor/niconico.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 179e7a9b16..e06740d62e 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -420,7 +420,7 @@ def _yield_dms_formats(self, api_data, video_id):
'x-request-with': 'https://www.nicovideo.jp',
})['data']['contentUrl']
# Getting all audio formats results in duplicate video formats which we filter out later
- dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
+ dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
@@ -432,7 +432,6 @@ def _yield_dms_formats(self, api_data, video_id):
'asr': ('samplingRate', {int_or_none}),
}), get_all=False),
'acodec': 'aac',
- 'ext': 'm4a',
}
# Sort before removing dupes to keep the format dicts with the lowest tbr
From 7f909046f4dc0fba472b4963145aef6e0d42491b Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 27 Sep 2024 11:37:16 -0500
Subject: [PATCH 018/261] [ie/abc.net.au:iview:showseries] Fix extraction
(#11101)
Closes #10475
Authored by: bashonly
---
yt_dlp/extractor/abc.py | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
index 7518ba6f0d..7296be73b3 100644
--- a/yt_dlp/extractor/abc.py
+++ b/yt_dlp/extractor/abc.py
@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
},
'playlist_count': 15,
+ 'skip': 'This program is not currently available in ABC iview',
+ }, {
+ 'url': 'https://iview.abc.net.au/show/inbestigators',
+ 'info_dict': {
+ 'id': '175343-1',
+ 'title': 'Series 1',
+ 'description': 'md5:b9976935a6450e5b78ce2a940a755685',
+ 'series': 'The Inbestigators',
+ 'season': 'Series 1',
+ 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
+ },
+ 'playlist_count': 17,
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
- webpage_data = self._search_regex(
- r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
- webpage, 'initial state')
- video_data = self._parse_json(
- unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
- video_data = video_data['route']['pageData']['_embedded']
+ video_data = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
+ transform_source=lambda x: x.encode().decode('unicode_escape'),
+ end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
From 48d629d461e05b1b19f5e53dc959bb9ebe95da42 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 27 Sep 2024 11:38:08 -0500
Subject: [PATCH 019/261] [ie/YleAreena] Support podcasts (#11104)
Closes #10840
Authored by: bashonly
---
yt_dlp/extractor/yle_areena.py | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/yt_dlp/extractor/yle_areena.py b/yt_dlp/extractor/yle_areena.py
index ef9e96804c..c0a218e2fc 100644
--- a/yt_dlp/extractor/yle_areena.py
+++ b/yt_dlp/extractor/yle_areena.py
@@ -10,7 +10,7 @@
class YleAreenaIE(InfoExtractor):
- _VALID_URL = r'https?://areena\.yle\.fi/(?P[\d-]+)'
+ _VALID_URL = r'https?://areena\.yle\.fi/(?Ppodcastit/)?(?P[\d-]+)'
_GEO_COUNTRIES = ['FI']
_TESTS = [
{
@@ -77,7 +77,7 @@ class YleAreenaIE(InfoExtractor):
]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
video_data = self._download_json(
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
@@ -103,8 +103,11 @@ def _real_extract(self, url):
'name': sub.get('kind'),
})
- kaltura_id = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id'), expected_type=str)
- if kaltura_id:
+ if is_podcast:
+ info_dict = {
+ 'url': video_data['data']['ongoing_ondemand']['media_url'],
+ }
+ elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
info_dict = {
'_type': 'url_transparent',
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
@@ -114,13 +117,11 @@ def _real_extract(self, url):
formats, subs = self._extract_m3u8_formats_and_subtitles(
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
self._merge_subtitles(subs, target=subtitles)
- info_dict = {
- 'id': video_id,
- 'formats': formats,
- }
+ info_dict = {'formats': formats}
return {
**info_dict,
+ 'id': video_id,
'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
or episode or info.get('title')),
'description': description,
From 0aa4426e9a35f7f8e184f1f2082b3b313c1448f7 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 27 Sep 2024 11:38:40 -0500
Subject: [PATCH 020/261] [ie/kick:clips] Support new URL format (#11107)
Closes #11105
Authored by: bashonly
---
yt_dlp/extractor/kick.py | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py
index abea5280ba..bd21e59501 100644
--- a/yt_dlp/extractor/kick.py
+++ b/yt_dlp/extractor/kick.py
@@ -148,7 +148,7 @@ def _real_extract(self, url):
class KickClipIE(KickBaseIE):
IE_NAME = 'kick:clips'
- _VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?Pclip_[\w-]+)'
+ _VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?Pclip_[\w-]+)'
_TESTS = [{
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
'info_dict': {
@@ -189,6 +189,26 @@ class KickClipIE(KickBaseIE):
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
+ 'info_dict': {
+ 'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
+ 'ext': 'mp4',
+ 'title': 'KLJASLDJKLJKASDLJKDAS',
+ 'channel': 'spreen',
+ 'channel_id': '5312671',
+ 'uploader': 'AnormalBarraBaja',
+ 'uploader_id': '26518262',
+ 'duration': 43.0,
+ 'upload_date': '20240927',
+ 'timestamp': 1727399987,
+ 'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': ['Minecraft'],
+ 'age_limit': 0,
+ },
+ 'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
From c08e0b20b5edd8957b8318716bc14e896d1b96f4 Mon Sep 17 00:00:00 2001
From: Kieran
Date: Fri, 27 Sep 2024 13:52:41 -0700
Subject: [PATCH 021/261] Allow `none` arg to negate `--convert-subs` and
`--convert-thumbnails` (#11066)
Authored by: kieraneglin
---
README.md | 10 +++++++---
yt_dlp/__init__.py | 5 +++++
yt_dlp/options.py | 8 +++++---
3 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 428eb9f478..1d6a4a86d5 100644
--- a/README.md
+++ b/README.md
@@ -999,12 +999,16 @@ ## Post-Processing Options:
be used multiple times
--no-exec Remove any previously defined --exec
--convert-subs FORMAT Convert the subtitles to another format
- (currently supported: ass, lrc, srt, vtt)
- (Alias: --convert-subtitles)
+ (currently supported: ass, lrc, srt, vtt).
+ Use "--convert-subs none" to disable
+ conversion (default) (Alias: --convert-
+ subtitles)
--convert-thumbnails FORMAT Convert the thumbnails to another format
(currently supported: jpg, png, webp). You
can specify multiple rules using similar
- syntax as --remux-video
+ syntax as "--remux-video". Use "--convert-
+ thumbnails none" to disable conversion
+ (default)
--split-chapters Split video into multiple files based on
internal chapters. The "chapter:" prefix can
be used with "--paths" and "--output" to set
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index c0b8e3b507..c2d19f94a0 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -235,6 +235,11 @@ def validate_minmax(min_val, max_val, min_name, max_name=None):
validate_regex('format sorting', f, FormatSorter.regex)
# Postprocessor formats
+ if opts.convertsubtitles == 'none':
+ opts.convertsubtitles = None
+ if opts.convertthumbnails == 'none':
+ opts.convertthumbnails = None
+
validate_regex('merge output format', opts.merge_output_format,
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index ffe2463fe2..8077d5d88f 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1725,15 +1725,17 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'--convert-subs', '--convert-sub', '--convert-subtitles',
metavar='FORMAT', dest='convertsubtitles', default=None,
help=(
- 'Convert the subtitles to another format (currently supported: {}) '
- '(Alias: --convert-subtitles)'.format(', '.join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)))))
+ 'Convert the subtitles to another format '
+ f'(currently supported: {", ".join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))}). '
+ 'Use "--convert-subs none" to disable conversion (default) (Alias: --convert-subtitles)'))
postproc.add_option(
'--convert-thumbnails',
metavar='FORMAT', dest='convertthumbnails', default=None,
help=(
'Convert the thumbnails to another format '
f'(currently supported: {", ".join(sorted(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS))}). '
- 'You can specify multiple rules using similar syntax as --remux-video'))
+ 'You can specify multiple rules using similar syntax as "--remux-video". '
+ 'Use "--convert-thumbnails none" to disable conversion (default)'))
postproc.add_option(
'--split-chapters', '--split-tracks',
dest='split_chapters', action='store_true', default=False,
From a1b4ac2b8ed8e6eaa56044d439f1e0d00c2ba218 Mon Sep 17 00:00:00 2001
From: fireattack
Date: Sat, 28 Sep 2024 04:57:57 +0800
Subject: [PATCH 022/261] [ie/vimeo] Fix HLS audio format sorting (#11082)
Closes #10854
Authored by: fireattack
---
yt_dlp/extractor/vimeo.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 9a03948cd9..367d5e5835 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -21,6 +21,7 @@
parse_filesize,
parse_iso8601,
parse_qs,
+ qualities,
smuggle_url,
str_or_none,
traverse_obj,
@@ -146,6 +147,8 @@ def _parse_config(self, config, video_id):
})
# TODO: fix handling of 308 status code returned for live archive manifest requests
+ QUALITIES = ('low', 'medium', 'high')
+ quality = qualities(QUALITIES)
sep_pattern = r'/sep/video/'
for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
@@ -166,6 +169,11 @@ def _parse_config(self, config, video_id):
m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
note=f'Downloading {cdn_name} m3u8 information',
fatal=False)
+ # m3u8 doesn't give audio bitrates; need to prioritize based on GROUP-ID
+ # See: https://github.com/yt-dlp/yt-dlp/issues/10854
+ for f in fmts:
+ if mobj := re.search(rf'audio-({"|".join(QUALITIES)})', f['format_id']):
+ f['quality'] = quality(mobj.group(1))
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif files_type == 'dash':
From 8f4ea14680c7865d8ffac10a9174205d1d84ada7 Mon Sep 17 00:00:00 2001
From: rakslice
Date: Fri, 27 Sep 2024 14:32:39 -0700
Subject: [PATCH 023/261] Fix format sorting bug with vp9.2 vcodec (#10884)
Authored by: rakslice
---
test/test_YoutubeDL.py | 29 +++++++++++++++++++++++++++++
test/test_utils.py | 5 +++++
yt_dlp/utils/_utils.py | 2 +-
3 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 1847c4ffd8..a99e624080 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -236,6 +236,35 @@ def test_format_selection_video(self):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
+ def test_format_selection_by_vcodec_sort(self):
+ formats = [
+ {'format_id': 'av1-format', 'ext': 'mp4', 'vcodec': 'av1', 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vp9-hdr-format', 'ext': 'mp4', 'vcodec': 'vp09.02.50.10.01.09.18.09.00', 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vp9-sdr-format', 'ext': 'mp4', 'vcodec': 'vp09.00.50.08', 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'h265-format', 'ext': 'mp4', 'vcodec': 'h265', 'acodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'bestvideo', 'format_sort': ['vcodec:vp9.2']})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'vp9-hdr-format')
+
+ ydl = YDL({'format': 'bestvideo', 'format_sort': ['vcodec:vp9']})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'vp9-sdr-format')
+
+ ydl = YDL({'format': 'bestvideo', 'format_sort': ['+vcodec:vp9.2']})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'vp9-hdr-format')
+
+ ydl = YDL({'format': 'bestvideo', 'format_sort': ['+vcodec:vp9']})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'vp9-sdr-format')
+
def test_format_selection_string_ops(self):
formats = [
{'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
diff --git a/test/test_utils.py b/test/test_utils.py
index a2b4593527..4f5fa1e100 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -921,6 +921,11 @@ def test_parse_codecs(self):
'acodec': 'none',
'dynamic_range': 'HDR10',
})
+ self.assertEqual(parse_codecs('vp09.02.50.10.01.09.18.09.00'), {
+ 'vcodec': 'vp09.02.50.10.01.09.18.09.00',
+ 'acodec': 'none',
+ 'dynamic_range': 'HDR10',
+ })
self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
'vcodec': 'av01.0.12M.10.0.110.09.16.09.0',
'acodec': 'none',
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 04dd0f8d2c..184794f95a 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -5281,7 +5281,7 @@ class FormatSorter:
settings = {
'vcodec': {'type': 'ordered', 'regex': True,
- 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
+ 'order': ['av0?1', 'vp0?9.0?2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
'acodec': {'type': 'ordered', 'regex': True,
'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
From 63da31b3b29af90062d8a72a905ffe4b5e499042 Mon Sep 17 00:00:00 2001
From: ndyanx <114776171+ndyanx@users.noreply.github.com>
Date: Fri, 27 Sep 2024 17:05:22 -0500
Subject: [PATCH 024/261] [ie/dropbox] Fix password-protected video support
(#10735)
Also adds thumbnail extraction
Closes #9864
Authored by: ndyanx
---
yt_dlp/extractor/dropbox.py | 52 +++++++++++++++++++++++++------------
1 file changed, 35 insertions(+), 17 deletions(-)
diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index 51b40df428..c122096230 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -6,8 +6,10 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ update_url,
update_url_query,
url_basename,
+ urlencode_postdata,
)
@@ -36,43 +38,58 @@ class DropboxIE(InfoExtractor):
},
]
+ def _yield_decoded_parts(self, webpage):
+ for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
+ yield base64.b64decode(encoded).decode('utf-8', 'ignore')
+
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
fn = urllib.parse.unquote(url_basename(url))
title = os.path.splitext(fn)[0]
-
password = self.get_param('videopassword')
- if (self._og_search_title(webpage) == 'Dropbox - Password Required'
- or 'Enter the password for this link' in webpage):
+ for part in self._yield_decoded_parts(webpage):
+ if '/sm/password' in part:
+ webpage = self._download_webpage(
+ update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
+ break
+
+ if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
+ or 'Enter the password for this link' in webpage):
if password:
- content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
- payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
response = self._download_json(
- 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
- headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
+ 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
+ headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
+ data=urlencode_postdata({
+ 'is_xhr': 'true',
+ 't': self._get_cookies('https://www.dropbox.com')['t'].value,
+ 'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
+ 'password': password,
+ 'url': url,
+ }))
if response.get('status') != 'authed':
- raise ExtractorError('Authentication failed!', expected=True)
- webpage = self._download_webpage(url, video_id)
- elif self._get_cookies('https://dropbox.com').get('sm_auth'):
- webpage = self._download_webpage(url, video_id)
- else:
+ raise ExtractorError('Invalid password', expected=True)
+ elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
raise ExtractorError('Password protected video, use --video-password ', expected=True)
+ webpage = self._download_webpage(url, video_id)
- formats, subtitles, has_anonymous_download = [], {}, False
- for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
- decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
+ formats, subtitles = [], {}
+ has_anonymous_download = False
+ thumbnail = None
+ for part in self._yield_decoded_parts(webpage):
if not has_anonymous_download:
has_anonymous_download = self._search_regex(
- r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
+ r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
transcode_url = self._search_regex(
- r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
+ r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
if not transcode_url:
continue
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
+ thumbnail = self._search_regex(
+ r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
break
# downloads enabled we can get the original file
@@ -89,4 +106,5 @@ def _real_extract(self, url):
'title': title,
'formats': formats,
'subtitles': subtitles,
+ 'thumbnail': thumbnail,
}
From 7509d692b37a7ec6230ea75bfe1e44a8de5eefce Mon Sep 17 00:00:00 2001
From: kclauhk <78251477+kclauhk@users.noreply.github.com>
Date: Sat, 28 Sep 2024 06:28:22 +0800
Subject: [PATCH 025/261] [ie/loom] Fix m3u8 formats extraction (#10760)
Closes #10737
Authored by: kclauhk
---
yt_dlp/extractor/loom.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/yt_dlp/extractor/loom.py b/yt_dlp/extractor/loom.py
index 1191aa17ea..b0878c33e2 100644
--- a/yt_dlp/extractor/loom.py
+++ b/yt_dlp/extractor/loom.py
@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
},
'params': {'videopassword': 'seniorinfants2'},
}, {
- # embed, transcoded-url endpoint sends empty JSON response
+ # embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
- 'md5': '8488817242a0db1cb2ad0ea522553cf6',
+ 'md5': 'b321d261656848c184a94e3b93eae28d',
'info_dict': {
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
'ext': 'mp4',
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
'timestamp': 1657216459,
'duration': 181,
},
+ 'params': {'format': 'bestvideo'}, # Test video-only fixup
'expected_warnings': ['Failed to parse JSON'],
}]
_WEBPAGE_TESTS = [{
@@ -293,7 +294,11 @@ def get_formats(format_url, format_id, quality):
format_url = format_url.replace('-split.m3u8', '.m3u8')
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
+ # Sometimes only split video/audio formats are available, need to fixup video-only formats
+ is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
for fmt in m3u8_formats:
+ if is_not_premerged and fmt.get('vcodec') != 'none':
+ fmt['acodec'] = 'none'
yield {
**fmt,
'url': update_url(fmt['url'], query=query),
From cca534cd9e6850c70244f225a4a1895ef4bcdbec Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 27 Sep 2024 17:30:31 -0500
Subject: [PATCH 026/261] Raise minimum recommended Python version to 3.9
(#11098)
Authored by: bashonly
---
devscripts/changelog_override.json | 5 ++++
yt_dlp/update.py | 38 +++++++++++++++++++++++-------
2 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index 5189de2d77..7be750cfbe 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -185,5 +185,10 @@
"action": "add",
"when": "6075a029dba70a89675ae1250e7cdfd91f0eba41",
"short": "[priority] Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785)\n - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors"
+ },
+ {
+ "action": "add",
+ "when": "fb8b7f226d251e521a89b23c415e249e5b788e5c",
+ "short": "[priority] **The minimum *recommended* Python version has been raised to 3.9**\nSince Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
}
]
diff --git a/yt_dlp/update.py b/yt_dlp/update.py
index 72ae290844..4cf3bdc320 100644
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@@ -135,20 +135,42 @@ def _get_binary_name():
def _get_system_deprecation():
- MIN_SUPPORTED, MIN_RECOMMENDED = (3, 8), (3, 8)
+ MIN_SUPPORTED, MIN_RECOMMENDED = (3, 8), (3, 9)
if sys.version_info > MIN_RECOMMENDED:
return None
major, minor = sys.version_info[:2]
- if sys.version_info < MIN_SUPPORTED:
- msg = f'Python version {major}.{minor} is no longer supported'
- else:
- msg = (f'Support for Python version {major}.{minor} has been deprecated. '
- '\nYou may stop receiving updates on this version at any time')
+ PYTHON_MSG = f'Please update to Python {".".join(map(str, MIN_RECOMMENDED))} or above'
- major, minor = MIN_RECOMMENDED
- return f'{msg}! Please update to Python {major}.{minor} or above'
+ if sys.version_info < MIN_SUPPORTED:
+ return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}'
+
+ EXE_MSG_TMPL = ('Support for {} has been deprecated. '
+ 'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}')
+ STOP_MSG = 'You may stop receiving updates on this version at any time!'
+ variant = detect_variant()
+
+ # Temporary until Windows builds use 3.9, which will drop support for Win7 and 2008ServerR2
+ if variant in ('win_exe', 'win_x86_exe', 'py2exe'):
+ platform_name = platform.platform()
+ if any(platform_name.startswith(f'Windows-{name}') for name in ('7', '2008ServerR2')):
+ return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG)
+ elif variant == 'py2exe':
+ return EXE_MSG_TMPL.format(
+ 'py2exe builds (yt-dlp_min.exe)', 'issues/10087',
+ 'In a future update you will be migrated to the PyInstaller-bundled executable. '
+ 'This will be done automatically; no action is required on your part')
+ return None
+
+ # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9
+ elif variant in ('linux_aarch64_exe', 'linux_armv7l_exe'):
+ libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2])
+ if libc_ver < (2, 31):
+ return EXE_MSG_TMPL.format('system glibc version < 2.31', 'pull/8638', STOP_MSG)
+ return None
+
+ return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}'
def _sha256_file(path):
From c6387abc1af9842bb0541288a5610abba9b1ab51 Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 27 Sep 2024 17:46:22 -0500
Subject: [PATCH 027/261] [cleanup] Misc (#10807)
Closes #10751, Closes #10769, Closes #10791
Authored by: bashonly, Codenade, pzhlkj6612, seproDev, coletdjnz, grqz, Grub4K
Co-authored-by: Codenade
Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
Co-authored-by: coletdjnz
Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com>
Co-authored-by: Simon Sawicki
---
.github/workflows/quick-test.yml | 2 +-
.github/workflows/release.yml | 4 ++--
README.md | 30 ++++++++++++++--------------
pyproject.toml | 2 +-
yt_dlp/downloader/external.py | 4 ++--
yt_dlp/extractor/academicearth.py | 2 +-
yt_dlp/extractor/ard.py | 4 ++--
yt_dlp/extractor/callin.py | 2 +-
yt_dlp/extractor/common.py | 8 ++++----
yt_dlp/extractor/fc2.py | 2 +-
yt_dlp/extractor/generic.py | 4 ++--
yt_dlp/extractor/getcourseru.py | 2 +-
yt_dlp/extractor/golem.py | 2 +-
yt_dlp/extractor/hrfensehen.py | 2 +-
yt_dlp/extractor/japandiet.py | 7 +++++--
yt_dlp/extractor/kaltura.py | 2 +-
yt_dlp/extractor/mailru.py | 2 +-
yt_dlp/extractor/mgtv.py | 2 +-
yt_dlp/extractor/mit.py | 2 +-
yt_dlp/extractor/nzonscreen.py | 2 +-
yt_dlp/extractor/pornhub.py | 3 +--
yt_dlp/extractor/radiofrance.py | 2 +-
yt_dlp/extractor/reverbnation.py | 2 +-
yt_dlp/extractor/tele13.py | 2 +-
yt_dlp/extractor/twitcasting.py | 2 +-
yt_dlp/extractor/viu.py | 2 +-
yt_dlp/extractor/ximalaya.py | 2 +-
yt_dlp/networking/_websockets.py | 4 ++--
yt_dlp/options.py | 10 +++++-----
yt_dlp/postprocessor/sponsorblock.py | 2 +-
yt_dlp/utils/_utils.py | 6 +++---
31 files changed, 63 insertions(+), 61 deletions(-)
diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml
index fe2a7e9239..1571d3cab4 100644
--- a/.github/workflows/quick-test.yml
+++ b/.github/workflows/quick-test.yml
@@ -15,7 +15,7 @@ jobs:
with:
python-version: '3.8'
- name: Install test requirements
- run: python3 ./devscripts/install_deps.py --include test
+ run: python3 ./devscripts/install_deps.py -o --include test
- name: Run tests
timeout-minutes: 15
run: |
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index fa5ad7e515..8d0bc4026a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -204,7 +204,7 @@ jobs:
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -u
git commit -m "Release ${{ env.version }}" \
- -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl"
+ -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all"
git push origin --force ${{ github.event.ref }}:release
- name: Get target commitish
@@ -325,7 +325,7 @@ jobs:
"(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES
printf '\n\n' >> ./RELEASE_NOTES
cat >> ./RELEASE_NOTES << EOF
- #### A description of the various files are in the [README](https://github.com/${{ github.repository }}#release-files)
+ #### A description of the various files is in the [README](https://github.com/${{ github.repository }}#release-files)
---
$(python ./devscripts/make_changelog.py -vv --collapsible)
EOF
diff --git a/README.md b/README.md
index 1d6a4a86d5..3e76a4efbb 100644
--- a/README.md
+++ b/README.md
@@ -200,7 +200,7 @@ #### Impersonation
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
-* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
+* [**curl_cffi**](https://github.com/lexiforest/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lexiforest/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/lexiforest/curl_cffi/blob/main/LICENSE)
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
* Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds
@@ -459,17 +459,17 @@ ## Video Selection:
conditions. Use a "\" to escape "&" or
quotes if needed. If used multiple times,
the filter matches if at least one of the
- conditions is met. E.g. --match-filter
- !is_live --match-filter "like_count>?100 &
+ conditions is met. E.g. --match-filters
+ !is_live --match-filters "like_count>?100 &
description~='(?i)\bcats \& dogs\b'" matches
only videos that are not live OR those that
have a like count more than 100 (or the like
field is not available) and also has a
description that contains the phrase "cats &
- dogs" (caseless). Use "--match-filter -" to
+ dogs" (caseless). Use "--match-filters -" to
interactively ask whether to download each
video
- --no-match-filters Do not use any --match-filter (default)
+ --no-match-filters Do not use any --match-filters (default)
--break-match-filters FILTER Same as "--match-filters" but stops the
download process when a video is rejected
--no-break-match-filters Do not use any --break-match-filters (default)
@@ -490,7 +490,7 @@ ## Video Selection:
encountering a file that is in the archive
(default)
--break-per-input Alters --max-downloads, --break-on-existing,
- --break-match-filter, and autonumber to
+ --break-match-filters, and autonumber to
reset per input URL
--no-break-per-input --break-on-existing and similar options
terminates the entire download queue
@@ -1771,7 +1771,7 @@ # EXTRACTOR ARGUMENTS
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
-* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
+* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@@ -2184,9 +2184,9 @@ ### New features
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
-* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc
+* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filters` etc
-* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
+* **Improvements**: Regex and other operators in `--format`/`--match-filters`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details
@@ -2227,7 +2227,7 @@ ### Differences in default behavior
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
* ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~
-* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
+* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filters` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
* The sub-modules `swfinterp`, `casefold` are removed.
@@ -2273,11 +2273,11 @@ #### Redundant options
--get-thumbnail --print thumbnail
-e, --get-title --print title
-g, --get-url --print urls
- --match-title REGEX --match-filter "title ~= (?i)REGEX"
- --reject-title REGEX --match-filter "title !~= (?i)REGEX"
- --min-views COUNT --match-filter "view_count >=? COUNT"
- --max-views COUNT --match-filter "view_count <=? COUNT"
- --break-on-reject Use --break-match-filter
+ --match-title REGEX --match-filters "title ~= (?i)REGEX"
+ --reject-title REGEX --match-filters "title !~= (?i)REGEX"
+ --min-views COUNT --match-filters "view_count >=? COUNT"
+ --max-views COUNT --match-filters "view_count <=? COUNT"
+ --break-on-reject Use --break-match-filters
--user-agent UA --add-header "User-Agent:UA"
--referer URL --add-header "Referer:URL"
--playlist-start NUMBER -I NUMBER:
diff --git a/pyproject.toml b/pyproject.toml
index 18d9a0a3a7..f54980d576 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,7 +76,7 @@ dev = [
]
static-analysis = [
"autopep8~=2.0",
- "ruff~=0.5.0",
+ "ruff~=0.6.0",
]
test = [
"pytest~=8.1",
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index ae2372915b..6c1ec403c8 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -508,7 +508,7 @@ def _call_downloader(self, tmpfilename, info_dict):
env = None
proxy = self.params.get('proxy')
if proxy:
- if not re.match(r'^[\da-zA-Z]+://', proxy):
+ if not re.match(r'[\da-zA-Z]+://', proxy):
proxy = f'http://{proxy}'
if proxy.startswith('socks'):
@@ -559,7 +559,7 @@ def _call_downloader(self, tmpfilename, info_dict):
selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats):
- is_http = re.match(r'^https?://', fmt['url'])
+ is_http = re.match(r'https?://', fmt['url'])
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
if cookies:
args.extend(['-cookies', ''.join(
diff --git a/yt_dlp/extractor/academicearth.py b/yt_dlp/extractor/academicearth.py
index d9691cb5c6..b997a02885 100644
--- a/yt_dlp/extractor/academicearth.py
+++ b/yt_dlp/extractor/academicearth.py
@@ -4,7 +4,7 @@
class AcademicEarthCourseIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)'
+ _VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
_TEST = {
'url': 'http://academicearth.org/playlists/laws-of-nature/',
diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py
index 6fd6413479..efc79dd141 100644
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@@ -231,7 +231,7 @@ def _real_extract(self, url):
class ARDBetaMediathekIE(InfoExtractor):
IE_NAME = 'ARDMediathek'
- _VALID_URL = r'''(?x)https://
+ _VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/]+/)?
(?:player|live|video)/
@@ -470,7 +470,7 @@ def _real_extract(self, url):
class ARDMediathekCollectionIE(InfoExtractor):
- _VALID_URL = r'''(?x)https://
+ _VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/?#]+/)?
(?Psendung|serie|sammlung)/
diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py
index b7061a7d14..ee2e56f8e0 100644
--- a/yt_dlp/extractor/callin.py
+++ b/yt_dlp/extractor/callin.py
@@ -3,7 +3,7 @@
class CallinIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P[-a-zA-Z]+)'
+ _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P[-a-zA-Z]+)'
_TESTS = [{
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
'info_dict': {
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 9501e5ec9a..486a4ea3cb 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2077,7 +2077,7 @@ def _parse_m3u8_formats_and_subtitles(
has_drm = HlsFD._has_drm(m3u8_doc)
def format_url(url):
- return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
+ return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@@ -2812,11 +2812,11 @@ def extract_Initialization(source):
base_url_e = element.find(_add_ns('BaseURL'))
if try_call(lambda: base_url_e.text) is not None:
base_url = base_url_e.text + base_url
- if re.match(r'^https?://', base_url):
+ if re.match(r'https?://', base_url):
break
if mpd_base_url and base_url.startswith('/'):
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
- elif mpd_base_url and not re.match(r'^https?://', base_url):
+ elif mpd_base_url and not re.match(r'https?://', base_url):
if not mpd_base_url.endswith('/'):
mpd_base_url += '/'
base_url = mpd_base_url + base_url
@@ -2906,7 +2906,7 @@ def prepare_template(template_name, identifiers):
}
def location_key(location):
- return 'url' if re.match(r'^https?://', location) else 'path'
+ return 'url' if re.match(r'https?://', location) else 'path'
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py
index eac70f6a96..f7b883155c 100644
--- a/yt_dlp/extractor/fc2.py
+++ b/yt_dlp/extractor/fc2.py
@@ -14,7 +14,7 @@
class FC2IE(InfoExtractor):
- _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)'
+ _VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)'
IE_NAME = 'fc2'
_NETRC_MACHINE = 'fc2'
_TESTS = [{
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 04cffaa861..592800287a 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2340,7 +2340,7 @@ def _real_extract(self, url):
default_search = 'fixup_error'
if default_search in ('auto', 'auto_warning', 'fixup_error'):
- if re.match(r'^[^\s/]+\.[^\s/]+/', url):
+ if re.match(r'[^\s/]+\.[^\s/]+/', url):
self.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
elif default_search != 'fixup_error':
@@ -2400,7 +2400,7 @@ def _real_extract(self, url):
# Check for direct link to a video
content_type = full_response.headers.get('Content-Type', '').lower()
- m = re.match(r'^(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P[^;\s]+)', content_type)
+ m = re.match(r'(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
headers = filter_dict({'Referer': smuggled_data.get('referer')})
diff --git a/yt_dlp/extractor/getcourseru.py b/yt_dlp/extractor/getcourseru.py
index 53b881011c..b7581d77e2 100644
--- a/yt_dlp/extractor/getcourseru.py
+++ b/yt_dlp/extractor/getcourseru.py
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
_VALID_URL = [
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P[^?#]+)',
- rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P\d+)',
+ rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P\d+)',
]
_TESTS = [{
'url': 'http://academymel.online/3video_1',
diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py
index 90d2fe6c26..964bf6519d 100644
--- a/yt_dlp/extractor/golem.py
+++ b/yt_dlp/extractor/golem.py
@@ -7,7 +7,7 @@
class GolemIE(InfoExtractor):
- _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P.+?)/'
+ _VALID_URL = r'https?://video\.golem\.de/.+?/(?P.+?)/'
_TEST = {
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py
index 17673d5b8f..b5a7b14a58 100644
--- a/yt_dlp/extractor/hrfensehen.py
+++ b/yt_dlp/extractor/hrfensehen.py
@@ -13,7 +13,7 @@
class HRFernsehenIE(InfoExtractor):
IE_NAME = 'hrfernsehen'
- _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P[0-9]{6})\.html'
+ _VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P[0-9]{6})\.html'
_TESTS = [{
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
'md5': '5c4e0ba94677c516a2f65a84110fc536',
diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py
index 2ef091aff2..994da22ae0 100644
--- a/yt_dlp/extractor/japandiet.py
+++ b/yt_dlp/extractor/japandiet.py
@@ -194,11 +194,14 @@ def _real_extract(self, url):
class SangiinInstructionIE(InfoExtractor):
- _VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
+ _VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
IE_DESC = False # this shouldn't be listed as a supported site
def _real_extract(self, url):
- raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
+ raise ExtractorError(
+ 'Copy the link from the button below the video description/player '
+ 'and use that link to download. If there is no button in the frame, '
+ 'get the URL of the frame showing the video.', expected=True)
class SangiinIE(InfoExtractor):
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py
index e5737b1e9e..6d51e32f6d 100644
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
(?:
kaltura:(?P\w+):(?P\w+)(?::(?P\w+))?|
https?://
- (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
+ (?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?:
(?:
# flash player
diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py
index cca678f14a..0496a87f00 100644
--- a/yt_dlp/extractor/mailru.py
+++ b/yt_dlp/extractor/mailru.py
@@ -126,7 +126,7 @@ def _real_extract(self, url):
video_data = None
# fix meta_url if missing the host address
- if re.match(r'^\/\+\/', meta_url):
+ if re.match(r'\/\+\/', meta_url):
meta_url = urljoin('https://my.mail.ru', meta_url)
if meta_url:
diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py
index d5dda06f99..c793626fde 100644
--- a/yt_dlp/extractor/mgtv.py
+++ b/yt_dlp/extractor/mgtv.py
@@ -16,7 +16,7 @@
class MGTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P\d+)\.html'
+ _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P\d+)\.html'
IE_DESC = '芒果TV'
IE_NAME = 'MangoTV'
diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py
index e75c540a23..66c3b07936 100644
--- a/yt_dlp/extractor/mit.py
+++ b/yt_dlp/extractor/mit.py
@@ -65,7 +65,7 @@ def _real_extract(self, url):
class OCWMITIE(InfoExtractor):
IE_NAME = 'ocw.mit.edu'
- _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P[a-z0-9\-]+)'
+ _VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P[a-z0-9\-]+)'
_BASE_URL = 'http://ocw.mit.edu/'
_TESTS = [
diff --git a/yt_dlp/extractor/nzonscreen.py b/yt_dlp/extractor/nzonscreen.py
index 5fc516daf4..755039804e 100644
--- a/yt_dlp/extractor/nzonscreen.py
+++ b/yt_dlp/extractor/nzonscreen.py
@@ -10,7 +10,7 @@
class NZOnScreenIE(InfoExtractor):
- _VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P[^/?#]+)'
+ _VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P[^/?#]+)'
_TESTS = [{
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
'info_dict': {
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index 679dc63234..e1e9777e8e 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -628,8 +628,7 @@ def is_404(e):
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
- for e in page_entries:
- yield e
+ yield from page_entries
if not self._has_more(webpage):
break
diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py
index ff21963541..9d90439841 100644
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -16,7 +16,7 @@
class RadioFranceIE(InfoExtractor):
- _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P[^?#]+)'
+ _VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P[^?#]+)'
IE_NAME = 'radiofrance'
_TEST = {
diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py
index ddf8c3753f..f3bcc2c328 100644
--- a/yt_dlp/extractor/reverbnation.py
+++ b/yt_dlp/extractor/reverbnation.py
@@ -6,7 +6,7 @@
class ReverbNationIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$'
+ _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$'
_TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py
index c5ca208fb4..0d721773ed 100644
--- a/yt_dlp/extractor/tele13.py
+++ b/yt_dlp/extractor/tele13.py
@@ -8,7 +8,7 @@
class Tele13IE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P[\w-]+)'
+ _VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P[\w-]+)'
_TESTS = [
{
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py
index 53b4084694..bf9c6348cb 100644
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@@ -270,7 +270,7 @@ def _real_extract(self, url):
class TwitCastingUserIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
+ _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
'info_dict': {
diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py
index 01e59352bf..f4ed96bf62 100644
--- a/yt_dlp/extractor/viu.py
+++ b/yt_dlp/extractor/viu.py
@@ -90,7 +90,7 @@ def _real_extract(self, url):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
for key, value in video_data.items():
- mobj = re.match(r'^subtitle_(?P[^_]+)_(?P(vtt|srt))', key)
+ mobj = re.match(r'subtitle_(?P[^_]+)_(?P(vtt|srt))', key)
if not mobj:
continue
subtitles.setdefault(mobj.group('lang'), []).append({
diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py
index d63964a004..02bf6a7beb 100644
--- a/yt_dlp/extractor/ximalaya.py
+++ b/yt_dlp/extractor/ximalaya.py
@@ -21,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor):
class XimalayaIE(XimalayaBaseIE):
IE_NAME = 'ximalaya'
IE_DESC = '喜马拉雅FM'
- _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P\d+)/)?sound/(?P[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P\d+)/)?sound/(?P[0-9]+)'
_TESTS = [
{
'url': 'http://www.ximalaya.com/sound/47740352/',
diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py
index 21b765b91d..ec55567dae 100644
--- a/yt_dlp/networking/_websockets.py
+++ b/yt_dlp/networking/_websockets.py
@@ -33,8 +33,8 @@
import websockets.version
websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
-if websockets_version < (12, 0):
- raise ImportError('Only websockets>=12.0 is supported')
+if websockets_version < (13, 0):
+ raise ImportError('Only websockets>=13.0 is supported')
import websockets.sync.client
from websockets.uri import parse_uri
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 8077d5d88f..9980b7fc3f 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -647,16 +647,16 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'You can also simply specify a field to match if the field is present, '
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
- 'the filter matches if at least one of the conditions is met. E.g. --match-filter '
- '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
+ 'the filter matches if at least one of the conditions is met. E.g. --match-filters '
+ '!is_live --match-filters "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
'matches only videos that are not live OR those that have a like count more than 100 '
'(or the like field is not available) and also has a description '
'that contains the phrase "cats & dogs" (caseless). '
- 'Use "--match-filter -" to interactively ask whether to download each video'))
+ 'Use "--match-filters -" to interactively ask whether to download each video'))
selection.add_option(
'--no-match-filters',
dest='match_filter', action='store_const', const=None,
- help='Do not use any --match-filter (default)')
+ help='Do not use any --match-filters (default)')
selection.add_option(
'--break-match-filters',
metavar='FILTER', dest='breaking_match_filter', action='append',
@@ -704,7 +704,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
selection.add_option(
'--break-per-input',
action='store_true', dest='break_per_url', default=False,
- help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL')
+ help='Alters --max-downloads, --break-on-existing, --break-match-filters, and autonumber to reset per input URL')
selection.add_option(
'--no-break-per-input',
action='store_false', dest='break_per_url',
diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py
index 6cf9ab62ea..b3fc8b54a8 100644
--- a/yt_dlp/postprocessor/sponsorblock.py
+++ b/yt_dlp/postprocessor/sponsorblock.py
@@ -33,7 +33,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
FFmpegPostProcessor.__init__(self, downloader)
self._categories = tuple(categories or self.CATEGORIES.keys())
- self._API_URL = api if re.match('^https?://', api) else 'https://' + api
+ self._API_URL = api if re.match('https?://', api) else 'https://' + api
def run(self, info):
extractor = info['extractor_key']
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 184794f95a..e1b3c48d63 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -1954,7 +1954,7 @@ def urljoin(base, path):
path = path.decode()
if not isinstance(path, str) or not path:
return None
- if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
+ if re.match(r'(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
base = base.decode()
@@ -2007,7 +2007,7 @@ def url_or_none(url):
if not url or not isinstance(url, str):
return None
url = url.strip()
- return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
+ return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
@@ -3113,7 +3113,7 @@ def is_html(first_bytes):
while first_bytes.startswith(bom):
encoding, first_bytes = enc, first_bytes[len(bom):]
- return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
+ return re.match(r'\s*<', first_bytes.decode(encoding, 'replace'))
def determine_protocol(info_dict):
From 5945fc1945a4001537072e39f03725f944437834 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 27 Sep 2024 23:01:13 +0000
Subject: [PATCH 028/261] Release 2024.09.27
Created by: bashonly
:ci skip all
---
CONTRIBUTORS | 16 +++++++++
Changelog.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++
supportedsites.md | 14 ++++++--
yt_dlp/version.py | 6 ++--
4 files changed, 120 insertions(+), 6 deletions(-)
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 489ab7da8b..c80f714055 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -657,3 +657,19 @@ luvyana
szantnerb
hugepower
scribblemaniac
+Codenade
+Demon000
+Deukhoofd
+grqz
+hibes
+Khaoklong51
+kieraneglin
+lengzuo
+naglis
+ndyanx
+otovalek
+quad
+rakslice
+sahilsinghss73
+tony-hn
+xingchensong
diff --git a/Changelog.md b/Changelog.md
index 0b96ab29cd..2ef28fa07a 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,96 @@ # Changelog
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
+### 2024.09.27
+
+#### Important changes
+- **The minimum *recommended* Python version has been raised to 3.9**
+Since Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)
+
+#### Core changes
+- [Allow `none` arg to negate `--convert-subs` and `--convert-thumbnails`](https://github.com/yt-dlp/yt-dlp/commit/c08e0b20b5edd8957b8318716bc14e896d1b96f4) ([#11066](https://github.com/yt-dlp/yt-dlp/issues/11066)) by [kieraneglin](https://github.com/kieraneglin)
+- [Fix format sorting bug with vp9.2 vcodec](https://github.com/yt-dlp/yt-dlp/commit/8f4ea14680c7865d8ffac10a9174205d1d84ada7) ([#10884](https://github.com/yt-dlp/yt-dlp/issues/10884)) by [rakslice](https://github.com/rakslice)
+- [Raise minimum recommended Python version to 3.9](https://github.com/yt-dlp/yt-dlp/commit/cca534cd9e6850c70244f225a4a1895ef4bcdbec) ([#11098](https://github.com/yt-dlp/yt-dlp/issues/11098)) by [bashonly](https://github.com/bashonly)
+- **cookies**: [Improve error message for Windows `--cookies-from-browser chrome` issue](https://github.com/yt-dlp/yt-dlp/commit/b397a64691421ace5df09457c2a764821a2dc6f2) ([#11090](https://github.com/yt-dlp/yt-dlp/issues/11090)) by [seproDev](https://github.com/seproDev)
+- **utils**: `mimetype2ext`: [Recognize `aacp` as `aac`](https://github.com/yt-dlp/yt-dlp/commit/cc85596d5b59f0c14e9381b3675f619c1e12e597) ([#10860](https://github.com/yt-dlp/yt-dlp/issues/10860)) by [bashonly](https://github.com/bashonly)
+
+#### Extractor changes
+- [Fix JW Player format parsing](https://github.com/yt-dlp/yt-dlp/commit/409f8e9e3b4bde81ef76fc563256f876d2ff8099) ([#10956](https://github.com/yt-dlp/yt-dlp/issues/10956)) by [seproDev](https://github.com/seproDev)
+- [Handle decode errors when reading responses](https://github.com/yt-dlp/yt-dlp/commit/325001317d97f4545d66fac44c4ba772c6f45f22) ([#10868](https://github.com/yt-dlp/yt-dlp/issues/10868)) by [bashonly](https://github.com/bashonly)
+- **abc.net.au**: iview, showseries: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7f909046f4dc0fba472b4963145aef6e0d42491b) ([#11101](https://github.com/yt-dlp/yt-dlp/issues/11101)) by [bashonly](https://github.com/bashonly)
+- **adn**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/cc88a54bb1ef285154775f8a6a413335ce4c71ce) ([#10749](https://github.com/yt-dlp/yt-dlp/issues/10749)) by [infanf](https://github.com/infanf)
+- **asobistage**: [Support redirected URLs](https://github.com/yt-dlp/yt-dlp/commit/a7d3235c84dac57a127cbe0ff38f7f7c2fdd8fa0) ([#10768](https://github.com/yt-dlp/yt-dlp/issues/10768)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **bandcamp**: user: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0176547f16a3642cd71627126e9dfc24981e20) ([#10328](https://github.com/yt-dlp/yt-dlp/issues/10328)) by [bashonly](https://github.com/bashonly), [quad](https://github.com/quad)
+- **beacon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b4760c778d0c92c6e3f2bc8346cd72c8f08595ae) ([#9901](https://github.com/yt-dlp/yt-dlp/issues/9901)) by [Deukhoofd](https://github.com/Deukhoofd)
+- **bilibili**
+ - [Fix chapters and subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/a2000bc85730c950351d78bb818493dc39dca3cb) ([#11099](https://github.com/yt-dlp/yt-dlp/issues/11099)) by [bashonly](https://github.com/bashonly)
+ - [Fix festival URL support](https://github.com/yt-dlp/yt-dlp/commit/b43bd864851f2862e26caa85461c5d825d49d463) ([#10740](https://github.com/yt-dlp/yt-dlp/issues/10740)) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz)
+- **biliintl**: [Fix referer header](https://github.com/yt-dlp/yt-dlp/commit/a06bb586795ebab87a2356923acfc674d6f0e152) ([#11003](https://github.com/yt-dlp/yt-dlp/issues/11003)) by [Khaoklong51](https://github.com/Khaoklong51)
+- **dropbox**: [Fix password-protected video support](https://github.com/yt-dlp/yt-dlp/commit/63da31b3b29af90062d8a72a905ffe4b5e499042) ([#10735](https://github.com/yt-dlp/yt-dlp/issues/10735)) by [ndyanx](https://github.com/ndyanx)
+- **ertgr**: [Fix video extraction](https://github.com/yt-dlp/yt-dlp/commit/416686ed0cf792ec44ab059f3b229dd776077e14) ([#11091](https://github.com/yt-dlp/yt-dlp/issues/11091)) by [seproDev](https://github.com/seproDev)
+- **eurosport**: [Support local URL variants](https://github.com/yt-dlp/yt-dlp/commit/f0bb28504c8c2b75ee3e5796aed50de2a7f90a1b) ([#10785](https://github.com/yt-dlp/yt-dlp/issues/10785)) by [seproDev](https://github.com/seproDev)
+- **facebook**
+ - ads: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d62fef7e07d454c0d2ba2d69fb96d691dba1ded0) ([#10704](https://github.com/yt-dlp/yt-dlp/issues/10704)) by [kclauhk](https://github.com/kclauhk)
+ - reel: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0e1b941c6b2caa688b0d3332e723d16dbafa4311) by [lengzuo](https://github.com/lengzuo)
+- **germanupa**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/124f058b546d652a359c67025bb479789bfbef0b) ([#10538](https://github.com/yt-dlp/yt-dlp/issues/10538)) by [grqz](https://github.com/grqz)
+- **hgtvde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a555389c9bb32e589e00b4664974423fb7b04dcd) ([#10992](https://github.com/yt-dlp/yt-dlp/issues/10992)) by [bashonly](https://github.com/bashonly), [rdamas](https://github.com/rdamas)
+- **huya**: video: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/25c1cdaa2650563494d3bf00a38f72d0d9486bff) ([#10686](https://github.com/yt-dlp/yt-dlp/issues/10686)) by [hugepower](https://github.com/hugepower)
+- **iprima**: [Fix zoom URL support](https://github.com/yt-dlp/yt-dlp/commit/4a27b8f092f7f7c10b7a334d3535c97c2af02f0a) ([#10959](https://github.com/yt-dlp/yt-dlp/issues/10959)) by [otovalek](https://github.com/otovalek)
+- **khanacademy**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0fba08485b6445b72b5b63ae23ca2a73fa5d967f) ([#10913](https://github.com/yt-dlp/yt-dlp/issues/10913)) by [seproDev](https://github.com/seproDev)
+- **kick**
+ - clips: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/0aa4426e9a35f7f8e184f1f2082b3b313c1448f7) ([#11107](https://github.com/yt-dlp/yt-dlp/issues/11107)) by [bashonly](https://github.com/bashonly)
+ - vod: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/173d54c151b987409e3eb09552d8d89ed8fc50f7) ([#10988](https://github.com/yt-dlp/yt-dlp/issues/10988)) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz)
+- **kika**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e6f48ca80821939c1fd11ec2a0cdbf2fba9b258a) ([#5788](https://github.com/yt-dlp/yt-dlp/issues/5788)) by [1100101](https://github.com/1100101)
+- **lnkgo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/fa83d0b36bc43d30fe9241c1e923f4614864b758) ([#10904](https://github.com/yt-dlp/yt-dlp/issues/10904)) by [naglis](https://github.com/naglis)
+- **loom**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/7509d692b37a7ec6230ea75bfe1e44a8de5eefce) ([#10760](https://github.com/yt-dlp/yt-dlp/issues/10760)) by [kclauhk](https://github.com/kclauhk)
+- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e2b3634e299be9c16a247ece3b1858d83889c324) ([#11083](https://github.com/yt-dlp/yt-dlp/issues/11083)) by [szantnerb](https://github.com/szantnerb)
+- **mojevideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/28b0ecba2af5b4919f198474b3d00a76ef322c31) ([#11019](https://github.com/yt-dlp/yt-dlp/issues/11019)) by [04-pasha-04](https://github.com/04-pasha-04), [pzhlkj6612](https://github.com/pzhlkj6612)
+- **niconico**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/eabb4680fdb09ba1f48d174a700a2e3b43f82add) ([#11103](https://github.com/yt-dlp/yt-dlp/issues/11103)) by [bashonly](https://github.com/bashonly)
+- **nzz**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4a9bc8c3630378bc29f0266126b503f6190c0430) ([#10461](https://github.com/yt-dlp/yt-dlp/issues/10461)) by [1-Byte](https://github.com/1-Byte)
+- **patreoncampaign**: [Support API URLs](https://github.com/yt-dlp/yt-dlp/commit/232e6db30c474d1b387e405342f34173ceeaf832) ([#10734](https://github.com/yt-dlp/yt-dlp/issues/10734)) by [bashonly](https://github.com/bashonly), [hibes](https://github.com/hibes)
+- **pinterest**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8c078fe28b0ffc15ef9646346c00c592fe71a78) ([#10867](https://github.com/yt-dlp/yt-dlp/issues/10867)) by [bashonly](https://github.com/bashonly), [sahilsinghss73](https://github.com/sahilsinghss73)
+- **radiko**: [Extract unique `id` values](https://github.com/yt-dlp/yt-dlp/commit/c8d096c5ce111411fbdbe2abb8fed54f317a6182) ([#10726](https://github.com/yt-dlp/yt-dlp/issues/10726)) by [garret1317](https://github.com/garret1317)
+- **rtp**: [Support more subpages](https://github.com/yt-dlp/yt-dlp/commit/d02df303d8e49390599db9f34482697e4d1cf5b2) ([#10787](https://github.com/yt-dlp/yt-dlp/issues/10787)) by [Demon000](https://github.com/Demon000)
+- **rumblechannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ad0b857f459a6d390fbf124183916218c52f223a) ([#11049](https://github.com/yt-dlp/yt-dlp/issues/11049)) by [tony-hn](https://github.com/tony-hn)
+- **rutube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/41be32e78c3845000dbac188ffb90ea3ea7c4dfa) ([#10844](https://github.com/yt-dlp/yt-dlp/issues/10844)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **samplefocus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/46f4c80bc363ee8116c33d37f65202e6c3470954) ([#10947](https://github.com/yt-dlp/yt-dlp/issues/10947)) by [seproDev](https://github.com/seproDev)
+- **screenrec**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/36f9e602ad55679764bc75a4f67f7562b1d6adcf) ([#10917](https://github.com/yt-dlp/yt-dlp/issues/10917)) by [naglis](https://github.com/naglis)
+- **sen**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/41a241ca6ffb95b3d9aaf4f42106ca8cba9af1a6) ([#10952](https://github.com/yt-dlp/yt-dlp/issues/10952)) by [seproDev](https://github.com/seproDev)
+- **servus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/300c91274f7ea5b1b0528fc5ee11cf1a61d4079e) ([#10944](https://github.com/yt-dlp/yt-dlp/issues/10944)) by [seproDev](https://github.com/seproDev)
+- **snapchatspotlight**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b37417e4f934fd8909788b493d017777155b0ae5) ([#11030](https://github.com/yt-dlp/yt-dlp/issues/11030)) by [seproDev](https://github.com/seproDev)
+- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a8a05aebb49693e78e1123015837ed5e961ff76) ([#11010](https://github.com/yt-dlp/yt-dlp/issues/11010)) by [diman8](https://github.com/diman8)
+- **tenplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d8d473002b654ab0e7b97ead869f58b4361eeae1) ([#10928](https://github.com/yt-dlp/yt-dlp/issues/10928)) by [aarubui](https://github.com/aarubui)
+- **tiktok**: [Fix web formats extraction](https://github.com/yt-dlp/yt-dlp/commit/3ad0b7f422d547204df687b6d0b2d9110fff3990) ([#11074](https://github.com/yt-dlp/yt-dlp/issues/11074)) by [bashonly](https://github.com/bashonly)
+- **twitter**: spaces: [Support video spaces](https://github.com/yt-dlp/yt-dlp/commit/bef1d4d6fc9493fda7f75e2289c07c507d10092f) ([#10789](https://github.com/yt-dlp/yt-dlp/issues/10789)) by [bashonly](https://github.com/bashonly)
+- **vidflex**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e978c312d6550a6ae4c9df18001afb1b420cb72f) ([#10002](https://github.com/yt-dlp/yt-dlp/issues/10002)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **vimeo**
+ - [Always try to extract original format](https://github.com/yt-dlp/yt-dlp/commit/4115c24d157c5b5f63089d75c4e0f51d1f8b4489) ([#10721](https://github.com/yt-dlp/yt-dlp/issues/10721)) by [bashonly](https://github.com/bashonly) (With fixes in [e8e6a98](https://github.com/yt-dlp/yt-dlp/commit/e8e6a982a1b659eed434d225d7922f632bac6568) by [seproDev](https://github.com/seproDev))
+ - [Fix HLS audio format sorting](https://github.com/yt-dlp/yt-dlp/commit/a1b4ac2b8ed8e6eaa56044d439f1e0d00c2ba218) ([#11082](https://github.com/yt-dlp/yt-dlp/issues/11082)) by [fireattack](https://github.com/fireattack)
+- **watchespn**: [Improve auth support](https://github.com/yt-dlp/yt-dlp/commit/7adff8caf152dcf96d03aff69ed8545c0a63567c) ([#10910](https://github.com/yt-dlp/yt-dlp/issues/10910)) by [ischmidt20](https://github.com/ischmidt20)
+- **wistia**: [Support password-protected videos](https://github.com/yt-dlp/yt-dlp/commit/9f5c9a90898c5a1e672922d9cd799716c73cee34) ([#11100](https://github.com/yt-dlp/yt-dlp/issues/11100)) by [bashonly](https://github.com/bashonly)
+- **ximalaya**: [Add VIP support](https://github.com/yt-dlp/yt-dlp/commit/3dfd720d098b4d49d69cfc77e6376f22bcd90934) ([#10832](https://github.com/yt-dlp/yt-dlp/issues/10832)) by [seproDev](https://github.com/seproDev), [xingchensong](https://github.com/xingchensong)
+- **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3aa0156e05662923d130ddbc1c82596e38c01a00) ([#10950](https://github.com/yt-dlp/yt-dlp/issues/10950)) by [seproDev](https://github.com/seproDev)
+- **yleareena**: [Support podcasts](https://github.com/yt-dlp/yt-dlp/commit/48d629d461e05b1b19f5e53dc959bb9ebe95da42) ([#11104](https://github.com/yt-dlp/yt-dlp/issues/11104)) by [bashonly](https://github.com/bashonly)
+- **youtube**
+ - [Add `po_token`, `visitor_data`, `data_sync_id` extractor args](https://github.com/yt-dlp/yt-dlp/commit/3a3bd00037e9908e87da4fa9f2ad772aa34dc60e) ([#10648](https://github.com/yt-dlp/yt-dlp/issues/10648)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [seproDev](https://github.com/seproDev) (With fixes in [fa2be9a](https://github.com/yt-dlp/yt-dlp/commit/fa2be9a7c63babede07480151363e54eee5702bd) by [bashonly](https://github.com/bashonly))
+ - [Support excluding `player_client`s in extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/49f3741a820ed142f6866317c2e7d247b130960e) ([#10710](https://github.com/yt-dlp/yt-dlp/issues/10710)) by [bashonly](https://github.com/bashonly)
+ - clip: [Prioritize `https` formats](https://github.com/yt-dlp/yt-dlp/commit/1d84b780cf33a1d84756825ac23f990a905703df) ([#11102](https://github.com/yt-dlp/yt-dlp/issues/11102)) by [bashonly](https://github.com/bashonly)
+ - tab: [Fix shorts tab extraction](https://github.com/yt-dlp/yt-dlp/commit/9431777b4c37129a6093080c77ca59960afbb9d7) ([#10938](https://github.com/yt-dlp/yt-dlp/issues/10938)) by [seproDev](https://github.com/seproDev)
+
+#### Networking changes
+- [Fix handler not being added to RequestError](https://github.com/yt-dlp/yt-dlp/commit/d1c4d88b2d912e8da5e76db455562ca63b1af690) ([#10955](https://github.com/yt-dlp/yt-dlp/issues/10955)) by [coletdjnz](https://github.com/coletdjnz)
+- [Pin `curl-cffi` version to < 0.7.2](https://github.com/yt-dlp/yt-dlp/commit/5bb1aa04dafce13ba9de707ea53169fab58b5207) ([#11092](https://github.com/yt-dlp/yt-dlp/issues/11092)) by [bashonly](https://github.com/bashonly)
+- **Request Handler**: websockets: [Upgrade websockets to 13.0](https://github.com/yt-dlp/yt-dlp/commit/6f9e6537434562d513d0c9b68ced8a61ade94a64) ([#10815](https://github.com/yt-dlp/yt-dlp/issues/10815)) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Misc. changes
+- **build**
+ - [Bump PyInstaller version pin to `>=6.10.0`](https://github.com/yt-dlp/yt-dlp/commit/fb8b7f226d251e521a89b23c415e249e5b788e5c) ([#10709](https://github.com/yt-dlp/yt-dlp/issues/10709)) by [bashonly](https://github.com/bashonly)
+ - [Pin `delocate` version for `macos`](https://github.com/yt-dlp/yt-dlp/commit/7e41628ff523b3fe373b0981a5db441358980dab) ([#10901](https://github.com/yt-dlp/yt-dlp/issues/10901)) by [bashonly](https://github.com/bashonly)
+- **ci**
+ - [Add comment sanitization workflow](https://github.com/yt-dlp/yt-dlp/commit/b6200bdcf3a9415ae36859188f9a57e3e461c696) ([#10915](https://github.com/yt-dlp/yt-dlp/issues/10915)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+ - [Add issue tracker anti-spam protection](https://github.com/yt-dlp/yt-dlp/commit/ad9a8115aa29a1a95c961b16fcf129a228d98f50) ([#10861](https://github.com/yt-dlp/yt-dlp/issues/10861)) by [bashonly](https://github.com/bashonly)
+- **cleanup**: Miscellaneous: [c6387ab](https://github.com/yt-dlp/yt-dlp/commit/c6387abc1af9842bb0541288a5610abba9b1ab51) by [bashonly](https://github.com/bashonly), [Codenade](https://github.com/Codenade), [coletdjnz](https://github.com/coletdjnz), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [pzhlkj6612](https://github.com/pzhlkj6612), [seproDev](https://github.com/seproDev)
+
### 2024.08.06
#### Core changes
diff --git a/supportedsites.md b/supportedsites.md
index e3bbe03ec7..e23d395fde 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -143,6 +143,7 @@ # Supported sites
- **BBVTV**: [*bbvtv*](## "netrc machine")
- **BBVTVLive**: [*bbvtv*](## "netrc machine")
- **BBVTVRecordings**: [*bbvtv*](## "netrc machine")
+ - **BeaconTv**
- **BeatBumpPlaylist**
- **BeatBumpVideo**
- **Beatport**
@@ -505,6 +506,7 @@ # Supported sites
- **gem.cbc.ca:playlist**
- **Genius**
- **GeniusLyrics**
+ - **Germanupa**: germanupa.de
- **GetCourseRu**: [*getcourseru*](## "netrc machine")
- **GetCourseRuPlayer**
- **Gettr**
@@ -580,6 +582,7 @@ # Supported sites
- **HungamaAlbumPlaylist**
- **HungamaSong**
- **huya:live**: huya.com
+ - **huya:video**: 虎牙视频
- **Hypem**
- **Hytale**
- **Icareus**
@@ -660,6 +663,7 @@ # Supported sites
- **kick:vod**
- **Kicker**
- **KickStarter**
+ - **Kika**: KiKA.de
- **kinja:embed**
- **KinoPoisk**
- **Kommunetv**
@@ -722,7 +726,6 @@ # Supported sites
- **livestream:original**
- **Livestreamfails**
- **Lnk**
- - **LnkGo**
- **loc**: Library of Congress
- **loom**
- **loom:folder**
@@ -756,7 +759,7 @@ # Supported sites
- **Masters**
- **MatchTV**
- **MBN**: mbn.co.kr (매일방송)
- - **MDR**: MDR.DE and KiKA
+ - **MDR**: MDR.DE
- **MedalTV**
- **media.ccc.de**
- **media.ccc.de:lists**
@@ -811,6 +814,7 @@ # Supported sites
- **MNetTVLive**: [*mnettv*](## "netrc machine")
- **MNetTVRecordings**: [*mnettv*](## "netrc machine")
- **MochaVideo**
+ - **Mojevideo**: mojevideo.sk
- **Mojvideo**
- **Monstercat**
- **MonsterSirenHypergryphMusic**
@@ -1285,12 +1289,14 @@ # Supported sites
- **Screencast**
- **Screencastify**
- **ScreencastOMatic**
+ - **ScreenRec**
- **ScrippsNetworks**
- **scrippsnetworks:watch**
- **Scrolller**
- **SCTE**: [*scte*](## "netrc machine") (**Currently broken**)
- **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**)
- **sejm**
+ - **Sen**
- **SenalColombiaLive**: (**Currently broken**)
- **SenateGov**
- **SenateISVP**
@@ -1327,6 +1333,7 @@ # Supported sites
- **SlidesLive**
- **Slutload**
- **Smotrim**
+ - **SnapchatSpotlight**
- **Snotr**
- **Sohu**
- **SohuV**
@@ -1608,6 +1615,7 @@ # Supported sites
- **videomore:season**
- **videomore:video**
- **VideoPress**
+ - **Vidflex**
- **Vidio**: [*vidio*](## "netrc machine")
- **VidioLive**: [*vidio*](## "netrc machine")
- **VidioPremier**: [*vidio*](## "netrc machine")
@@ -1736,7 +1744,7 @@ # Supported sites
- **XiaoHongShu**: 小红书
- **ximalaya**: 喜马拉雅FM
- **ximalaya:album**: 喜马拉雅FM 专辑
- - **xinpianchang**: xinpianchang.com (**Currently broken**)
+ - **Xinpianchang**: 新片场
- **XMinus**: (**Currently broken**)
- **XNXX**
- **Xstream**
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 6633a11b91..76b8bf0ee2 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2024.08.06'
+__version__ = '2024.09.27'
-RELEASE_GIT_HEAD = '4d9231208332d4c32364b8cd814bff8b20232cae'
+RELEASE_GIT_HEAD = 'c6387abc1af9842bb0541288a5610abba9b1ab51'
VARIANT = None
@@ -12,4 +12,4 @@
ORIGIN = 'yt-dlp/yt-dlp'
-_pkg_version = '2024.08.06'
+_pkg_version = '2024.09.27'
From 6328e2e67a4e126e08af382e6a387073082d5c5f Mon Sep 17 00:00:00 2001
From: Corey Wright
Date: Sun, 29 Sep 2024 16:03:39 -0500
Subject: [PATCH 029/261] [ie/ApplePodcasts] Fix extractor (#10903)
Closes #10809
Authored by: coreywright
---
yt_dlp/extractor/applepodcasts.py | 78 ++++++++++++++-----------------
yt_dlp/extractor/common.py | 2 +-
2 files changed, 36 insertions(+), 44 deletions(-)
diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py
index bd301e904a..b99d24e0eb 100644
--- a/yt_dlp/extractor/applepodcasts.py
+++ b/yt_dlp/extractor/applepodcasts.py
@@ -1,27 +1,42 @@
from .common import InfoExtractor
from ..utils import (
- clean_html,
clean_podcast_url,
- get_element_by_class,
int_or_none,
parse_iso8601,
- try_get,
)
+from ..utils.traversal import traverse_obj
class ApplePodcastsIE(InfoExtractor):
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P\d+)'
_TESTS = [{
+ 'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
+ 'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
+ 'info_dict': {
+ 'id': '1000665010654',
+ 'ext': 'mp3',
+ 'title': 'Ferreck Dawn - To The Break of Dawn 117',
+ 'episode': 'Ferreck Dawn - To The Break of Dawn 117',
+ 'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
+ 'upload_date': '20240812',
+ 'timestamp': 1723449600,
+ 'duration': 3596,
+ 'series': 'Ferreck Dawn - To The Break of Dawn',
+ 'thumbnail': 're:.+[.](png|jpe?g|webp)',
+ },
+ }, {
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
- 'md5': '41dc31cd650143e530d9423b6b5a344f',
+ 'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
'info_dict': {
'id': '1000482637777',
'ext': 'mp3',
'title': '207 - Whitney Webb Returns',
+ 'episode': '207 - Whitney Webb Returns',
+ 'episode_number': 207,
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
'upload_date': '20200705',
'timestamp': 1593932400,
- 'duration': 6454,
+ 'duration': 5369,
'series': 'The Tim Dillon Show',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
},
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
def _real_extract(self, url):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
- episode_data = {}
- ember_data = {}
- # new page type 2021-11
- amp_data = self._parse_json(self._search_regex(
- r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
- webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
- amp_data = try_get(amp_data,
- lambda a: self._parse_json(
- next(a[x] for x in iter(a) if episode_id in x),
- episode_id),
- dict) or {}
- amp_data = amp_data.get('d') or []
- episode_data = try_get(
- amp_data,
- lambda a: next(x for x in a
- if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
- dict)
- if not episode_data:
- # try pre 2021-11 page type: TODO: consider deleting if no longer used
- ember_data = self._parse_json(self._search_regex(
- r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
- webpage, 'ember data'), episode_id) or {}
- ember_data = ember_data.get(episode_id) or ember_data
- episode_data = try_get(ember_data, lambda x: x['data'], dict)
- episode = episode_data['attributes']
- description = episode.get('description') or {}
-
- series = None
- for inc in (amp_data or ember_data.get('included') or []):
- if inc.get('type') == 'media/podcast':
- series = try_get(inc, lambda x: x['attributes']['name'])
- series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
+ server_data = self._search_json(
+ r'', webpage),
+ (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
+ meta = traverse_obj(nextjs_data, (
+ ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
+
+ video_id = meta['uuid']
+ info_dict = traverse_obj(meta, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str.strip}),
+ })
+
+ if traverse_obj(meta, ('program', 'subtype')) != 'movie':
+ for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
+ episode_data = traverse_obj(
+ season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
+ if not episode_data:
+ continue
+
+ episode_title = traverse_obj(
+ episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
+ info_dict.update({
+ 'title': episode_title or info_dict.get('title'),
+ 'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
+ 'season_number': traverse_obj(season_data, ('season', {int_or_none})),
+ 'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
+ })
+ break
api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
From a9f85670d03ab993dc589f21a9ffffcad61392d5 Mon Sep 17 00:00:00 2001
From: manav_chaudhary <100396248+manavchaudhary1@users.noreply.github.com>
Date: Tue, 12 Nov 2024 04:11:56 +0530
Subject: [PATCH 125/261] [ie/Chaturbate] Support alternate domains (#10595)
Closes #10594
Authored by: manavchaudhary1
---
yt_dlp/extractor/chaturbate.py | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py
index b49f741efa..864d61f9c2 100644
--- a/yt_dlp/extractor/chaturbate.py
+++ b/yt_dlp/extractor/chaturbate.py
@@ -9,7 +9,7 @@
class ChaturbateIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P[^/?]+)'
+ _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?Pcom|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P[^/?]+)'
_TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': {
@@ -29,15 +29,24 @@ class ChaturbateIE(InfoExtractor):
}, {
'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True,
+ }, {
+ 'url': 'https://chaturbate.eu/siswet19/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://chaturbate.global/siswet19/',
+ 'only_matching': True,
}]
_ROOM_OFFLINE = 'Room is currently offline'
def _real_extract(self, url):
- video_id = self._match_id(url)
+ video_id, tld = self._match_valid_url(url).group('id', 'tld')
webpage = self._download_webpage(
- f'https://chaturbate.com/{video_id}/', video_id,
+ f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers())
found_m3u8_urls = []
From bacc31b05a04181b63100c481565256b14813a5e Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Tue, 12 Nov 2024 23:23:10 +0000
Subject: [PATCH 126/261] [ie/facebook] Fix formats extraction (#11513)
Closes #11497
Authored by: bashonly
---
yt_dlp/extractor/facebook.py | 35 ++++++++++++++++++++++++++++++-----
1 file changed, 30 insertions(+), 5 deletions(-)
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 2bcb5a8411..91e2f3489c 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -563,13 +563,13 @@ def extract_from_jsmods_instances(js_data):
return extract_video_data(try_get(
js_data, lambda x: x['jsmods']['instances'], list) or [])
- def extract_dash_manifest(video, formats):
+ def extract_dash_manifest(vid_data, formats, mpd_url=None):
dash_manifest = traverse_obj(
- video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
+ vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
- mpd_url=url_or_none(video.get('dash_manifest_url'))))
+ mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url))
def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around
@@ -619,9 +619,12 @@ def parse_graphql_video(video):
video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info)
- fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
+
formats = []
q = qualities(['sd', 'hd'])
+
+ # Legacy formats extraction
+ fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
('browser_native_sd_url', 'sd')):
@@ -629,7 +632,7 @@ def parse_graphql_video(video):
if not playable_url:
continue
if determine_ext(playable_url) == 'mpd':
- formats.extend(self._extract_mpd_formats(playable_url, video_id))
+ formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
else:
formats.append({
'format_id': format_id,
@@ -638,6 +641,28 @@ def parse_graphql_video(video):
'url': playable_url,
})
extract_dash_manifest(fmt_data, formats)
+
+ # New videoDeliveryResponse formats extraction
+ fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
+ mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
+ dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
+ for idx, dash_manifest in enumerate(dash_manifests):
+ extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
+ if not dash_manifests:
+ # Only extract from MPD URLs if the manifests are not already provided
+ for mpd_url in mpd_urls:
+ formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
+ for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
+ format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
+ formats.append({
+ 'format_id': format_id,
+ # sd, hd formats w/o resolution info should be deprioritized below DASH
+ 'quality': q(format_id) - 3,
+ 'url': prog_fmt['progressive_url'],
+ })
+ for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
+ formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
+
if not formats:
# Do not append false positive entry w/o any formats
return
From f2a4983df7a64c4e93b56f79dbd16a781bd90206 Mon Sep 17 00:00:00 2001
From: Jackson Humphrey
Date: Tue, 12 Nov 2024 17:26:18 -0600
Subject: [PATCH 127/261] [ie/archive.org] Fix comments extraction (#11527)
Closes #11526
Authored by: jshumphrey
---
yt_dlp/extractor/archiveorg.py | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index f5a55efc4f..2849d9fd5b 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
},
},
],
+ }, {
+ # The reviewbody is None for one of the reviews; just need to extract data without crashing
+ 'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
+ 'info_dict': {
+ 'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
+ 'ext': 'mp3',
+ 'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
+ 'creators': ['Grateful Dead'],
+ 'duration': 338.31,
+ 'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
+ 'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
+ 'display_id': 'gd95-04-02d1t04.shn',
+ 'location': 'Pyramid Arena',
+ 'uploader': 'jon@archive.org',
+ 'album': '1995-04-02 - Pyramid Arena',
+ 'upload_date': '20040519',
+ 'track_number': 4,
+ 'release_date': '19950402',
+ 'timestamp': 1084927901,
+ },
}]
@staticmethod
@@ -335,7 +355,7 @@ def _real_extract(self, url):
info['comments'].append({
'id': review.get('review_id'),
'author': review.get('reviewer'),
- 'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
+ 'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
'timestamp': unified_timestamp(review.get('createdate')),
'parent': 'root'})
From 39d79c9b9cf23411d935910685c40aa1a2fdb409 Mon Sep 17 00:00:00 2001
From: Simon Sawicki
Date: Fri, 15 Nov 2024 22:06:15 +0100
Subject: [PATCH 128/261] [utils] Fix `join_nonempty`, add `**kwargs` to
`unpack` (#11559)
Authored by: Grub4K
---
test/test_traversal.py | 2 +-
test/test_utils.py | 5 -----
yt_dlp/utils/_utils.py | 3 +--
yt_dlp/utils/traversal.py | 4 ++--
4 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/test/test_traversal.py b/test/test_traversal.py
index d48606e99c..52ea19fab3 100644
--- a/test/test_traversal.py
+++ b/test/test_traversal.py
@@ -525,7 +525,7 @@ def test_trim_str(self):
def test_unpack(self):
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
- assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3'
+ assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
with pytest.raises(TypeError):
unpack(join_nonempty)()
with pytest.raises(TypeError):
diff --git a/test/test_utils.py b/test/test_utils.py
index b5f35736b6..835774a912 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -72,7 +72,6 @@
intlist_to_bytes,
iri_to_uri,
is_html,
- join_nonempty,
js_to_json,
limit_length,
locked_file,
@@ -2158,10 +2157,6 @@ def test_partial_application(self):
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
- assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
- assert callable(join_nonempty()), 'varargs positional should apply partially'
- assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
-
if __name__ == '__main__':
unittest.main()
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index b28bb555e1..89c53c39e7 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -216,7 +216,7 @@ def partial_application(func):
sig = inspect.signature(func)
required_args = [
param.name for param in sig.parameters.values()
- if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
+ if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
if param.default is inspect.Parameter.empty
]
@@ -4837,7 +4837,6 @@ def number_of_digits(number):
return len('%d' % number)
-@partial_application
def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None:
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py
index 361f239ba6..6bb52050f2 100644
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@@ -452,9 +452,9 @@ def trim(s):
return trim
-def unpack(func):
+def unpack(func, **kwargs):
@functools.wraps(func)
- def inner(items, **kwargs):
+ def inner(items):
return func(*items, **kwargs)
return inner
From c014fbcddcb4c8f79d914ac5bb526758b540ea33 Mon Sep 17 00:00:00 2001
From: Simon Sawicki
Date: Fri, 15 Nov 2024 23:25:52 +0100
Subject: [PATCH 129/261] [utils] `subs_list_to_dict`: Add `lang` default
parameter (#11508)
Authored by: Grub4K
---
test/test_traversal.py | 50 ++++++++++++++++++++++++++++++++++++++-
yt_dlp/utils/traversal.py | 22 ++++++++++-------
2 files changed, 63 insertions(+), 9 deletions(-)
diff --git a/test/test_traversal.py b/test/test_traversal.py
index 52ea19fab3..bc433029d8 100644
--- a/test/test_traversal.py
+++ b/test/test_traversal.py
@@ -481,7 +481,7 @@ def test_subs_list_to_dict(self):
'id': 'name',
'data': 'content',
'url': 'url',
- }, all, {subs_list_to_dict}]) == {
+ }, all, {subs_list_to_dict(lang=None)}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered'
@@ -507,6 +507,54 @@ def test_subs_list_to_dict(self):
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly'
+ assert traverse_obj([
+ {'name': 'de', 'url': 'https://example.com/subs/de.ass'},
+ {'name': 'de'},
+ {'name': 'en', 'content': 'content'},
+ {'url': 'https://example.com/subs/en'},
+ ], [..., {
+ 'id': 'name',
+ 'url': 'url',
+ 'data': 'content',
+ }, all, {subs_list_to_dict(lang='en')}]) == {
+ 'de': [{'url': 'https://example.com/subs/de.ass'}],
+ 'en': [
+ {'data': 'content'},
+ {'url': 'https://example.com/subs/en'},
+ ],
+ }, 'optionally provided lang should be used if no id available'
+ assert traverse_obj([
+ {'name': 1, 'url': 'https://example.com/subs/de1'},
+ {'name': {}, 'url': 'https://example.com/subs/de2'},
+ {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
+ {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
+ ], [..., {
+ 'id': 'name',
+ 'url': 'url',
+ 'ext': 'ext',
+ }, all, {subs_list_to_dict(lang=None)}]) == {
+ 'de': [
+ {'url': 'https://example.com/subs/de3'},
+ {'url': 'https://example.com/subs/de4'},
+ ],
+ }, 'non str types should be ignored for id and ext'
+ assert traverse_obj([
+ {'name': 1, 'url': 'https://example.com/subs/de1'},
+ {'name': {}, 'url': 'https://example.com/subs/de2'},
+ {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
+ {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
+ ], [..., {
+ 'id': 'name',
+ 'url': 'url',
+ 'ext': 'ext',
+ }, all, {subs_list_to_dict(lang='de')}]) == {
+ 'de': [
+ {'url': 'https://example.com/subs/de1'},
+ {'url': 'https://example.com/subs/de2'},
+ {'url': 'https://example.com/subs/de3'},
+ {'url': 'https://example.com/subs/de4'},
+ ],
+ }, 'non str types should be replaced by default id'
def test_trim_str(self):
with pytest.raises(TypeError):
diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py
index 6bb52050f2..76b51f53d1 100644
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@@ -332,14 +332,14 @@ class _RequiredError(ExtractorError):
@typing.overload
-def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
+def subs_list_to_dict(*, lang: str | None = 'und', ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
@typing.overload
-def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ...
+def subs_list_to_dict(subs: list[dict] | None, /, *, lang: str | None = 'und', ext: str | None = None) -> dict[str, list[dict]]: ...
-def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
+def subs_list_to_dict(subs: list[dict] | None = None, /, *, lang='und', ext=None):
"""
Convert subtitles from a traversal into a subtitle dict.
The path should have an `all` immediately before this function.
@@ -352,7 +352,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
`quality` The sort order for each subtitle
"""
if subs is None:
- return functools.partial(subs_list_to_dict, ext=ext)
+ return functools.partial(subs_list_to_dict, lang=lang, ext=ext)
result = collections.defaultdict(list)
@@ -360,10 +360,16 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
if not url_or_none(sub.get('url')) and not sub.get('data'):
continue
sub_id = sub.pop('id', None)
- if sub_id is None:
- continue
- if ext is not None and not sub.get('ext'):
- sub['ext'] = ext
+ if not isinstance(sub_id, str):
+ if not lang:
+ continue
+ sub_id = lang
+ sub_ext = sub.get('ext')
+ if not isinstance(sub_ext, str):
+ if not ext:
+ sub.pop('ext', None)
+ else:
+ sub['ext'] = ext
result[sub_id].append(sub)
result = dict(result)
From eb64ae7d5def6df2aba74fb703e7f168fb299865 Mon Sep 17 00:00:00 2001
From: bashonly
Date: Thu, 14 Nov 2024 16:08:50 -0600
Subject: [PATCH 130/261] [ie] Allow `ext` override for thumbnails (#11545)
Authored by: bashonly
---
yt_dlp/YoutubeDL.py | 4 +++-
yt_dlp/extractor/common.py | 1 +
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 3186a999de..3130deda31 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -4381,7 +4381,9 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
return None
for idx, t in list(enumerate(thumbnails))[::-1]:
- thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
+ thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
+ if multiple:
+ thumb_ext = f'{t["id"]}.{thumb_ext}'
thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 01915acf23..23f6fc6c46 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -279,6 +279,7 @@ class InfoExtractor:
thumbnails: A list of dictionaries, with the following entries:
* "id" (optional, string) - Thumbnail format ID
* "url"
+ * "ext" (optional, string) - actual image extension if not given in URL
* "preference" (optional, int) - quality of the image
* "width" (optional, int)
* "height" (optional, int)
From c699bafc5038b59c9afe8c2e69175fb66424c832 Mon Sep 17 00:00:00 2001
From: bashonly
Date: Thu, 14 Nov 2024 16:09:11 -0600
Subject: [PATCH 131/261] [ie/soop] Fix thumbnail extraction (#11545)
Closes #11537
Authored by: bashonly
---
yt_dlp/extractor/afreecatv.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
index 6682a89817..572d1a3893 100644
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@@ -66,6 +66,14 @@ def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON')
+ @staticmethod
+ def _fixup_thumb(thumb_url):
+ if not url_or_none(thumb_url):
+ return None
+ # Core would determine_ext as 'php' from the url, so we need to provide the real ext
+ # See: https://github.com/yt-dlp/yt-dlp/issues/11537
+ return [{'url': thumb_url, 'ext': 'jpg'}]
+
class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop'
@@ -155,7 +163,7 @@ def _real_extract(self, url):
'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
- 'thumbnail': ('thumb', {url_or_none}),
+ 'thumbnails': ('thumb', {self._fixup_thumb}),
})
entries = []
@@ -226,8 +234,7 @@ def _real_extract(self, url):
return self.playlist_result(self._entries(data), video_id)
- @staticmethod
- def _entries(data):
+ def _entries(self, data):
# 'files' is always a list with 1 element
yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch',
@@ -238,7 +245,7 @@ def _entries(data):
'title': ('title', {str}),
'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}),
- 'thumbnail': ('thumb', {url_or_none}),
+ 'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}),
}))
From 70c55cb08f780eab687e881ef42bb5c6007d290b Mon Sep 17 00:00:00 2001
From: Alessandro Campolo
Date: Sat, 16 Nov 2024 13:56:15 +0100
Subject: [PATCH 132/261] [ie/RadioRadicale] Add extractor (#5607)
Authored by: a13ssandr0, pzhlkj6612
Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
---
yt_dlp/extractor/_extractors.py | 1 +
yt_dlp/extractor/radioradicale.py | 105 ++++++++++++++++++++++++++++++
2 files changed, 106 insertions(+)
create mode 100644 yt_dlp/extractor/radioradicale.py
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 51caefd4d7..d6ab610025 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1649,6 +1649,7 @@
RadioKapitalIE,
RadioKapitalShowIE,
)
+from .radioradicale import RadioRadicaleIE
from .radiozet import RadioZetPodcastIE
from .radlive import (
RadLiveChannelIE,
diff --git a/yt_dlp/extractor/radioradicale.py b/yt_dlp/extractor/radioradicale.py
new file mode 100644
index 0000000000..472e25c45f
--- /dev/null
+++ b/yt_dlp/extractor/radioradicale.py
@@ -0,0 +1,105 @@
+from .common import InfoExtractor
+from ..utils import url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class RadioRadicaleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.radioradicale.it/scheda/471591',
+ 'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
+ 'info_dict': {
+ 'id': '471591',
+ 'ext': 'mp4',
+ 'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
+ 'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
+ 'location': 'Napoli',
+ 'duration': 2852.0,
+ 'timestamp': 1459987200,
+ 'upload_date': '20160407',
+ 'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
+ },
+ }, {
+ 'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura',
+ 'info_dict': {
+ 'id': '742783',
+ 'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
+ 'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
+ 'location': 'CAMERA',
+ 'duration': 5868.0,
+ 'timestamp': 1730246400,
+ 'upload_date': '20241030',
+ },
+ 'playlist': [{
+ 'md5': 'aa48de55dcc45478e4cd200f299aab7d',
+ 'info_dict': {
+ 'id': '742783-0',
+ 'ext': 'mp4',
+ 'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
+ },
+ }, {
+ 'md5': 'be915c189c70ad2920e5810f32260ff5',
+ 'info_dict': {
+ 'id': '742783-1',
+ 'ext': 'mp4',
+ 'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
+ },
+ }, {
+ 'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
+ 'info_dict': {
+ 'id': '742783-2',
+ 'ext': 'mp4',
+ 'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
+ },
+ }],
+ }]
+
+ def _entries(self, videos_info, page_id):
+ for idx, video in enumerate(traverse_obj(
+ videos_info, ('playlist', lambda _, v: v['sources']))):
+ video_id = f'{page_id}-{idx}'
+ formats = []
+ subtitles = {}
+
+ for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
+ self._merge_subtitles({sub.get('srclang') or 'und': [{
+ 'url': sub['src'],
+ 'name': sub.get('label'),
+ }]}, target=subtitles)
+
+ yield {
+ 'id': video_id,
+ 'title': video.get('title'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ videos_info = self._search_json(
+ r'jQuery\.extend\(Drupal\.settings\s*,',
+ webpage, 'videos_info', page_id)['RRscheda']
+
+ entries = list(self._entries(videos_info, page_id))
+
+ common_info = {
+ 'id': page_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'location': videos_info.get('luogo'),
+ **self._search_json_ld(webpage, page_id),
+ }
+
+ if len(entries) == 1:
+ return {
+ **entries[0],
+ **common_info,
+ }
+
+ return self.playlist_result(entries, multi_video=True, **common_info)
From 6365e92589e4bc17b8fffb0125a716d144ad2137 Mon Sep 17 00:00:00 2001
From: sepro
Date: Sat, 16 Nov 2024 17:56:43 +0100
Subject: [PATCH 133/261] [ie/bandlab] Add extractors (#11535)
Closes #7750
Authored by: seproDev
---
yt_dlp/extractor/_extractors.py | 4 +
yt_dlp/extractor/bandlab.py | 438 ++++++++++++++++++++++++++++++++
2 files changed, 442 insertions(+)
create mode 100644 yt_dlp/extractor/bandlab.py
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index d6ab610025..25a233a2d6 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -208,6 +208,10 @@
BandcampUserIE,
BandcampWeeklyIE,
)
+from .bandlab import (
+ BandlabIE,
+ BandlabPlaylistIE,
+)
from .bannedvideo import BannedVideoIE
from .bbc import (
BBCIE,
diff --git a/yt_dlp/extractor/bandlab.py b/yt_dlp/extractor/bandlab.py
new file mode 100644
index 0000000000..e48d5d3f76
--- /dev/null
+++ b/yt_dlp/extractor/bandlab.py
@@ -0,0 +1,438 @@
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ format_field,
+ int_or_none,
+ parse_iso8601,
+ parse_qs,
+ truncate_string,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj, value
+
+
+class BandlabBaseIE(InfoExtractor):
+ def _call_api(self, endpoint, asset_id, **kwargs):
+ headers = kwargs.pop('headers', None) or {}
+ return self._download_json(
+ f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
+ asset_id, headers={
+ 'accept': 'application/json',
+ 'referer': 'https://www.bandlab.com/',
+ 'x-client-id': 'BandLab-Web',
+ 'x-client-version': '10.1.124',
+ **headers,
+ }, **kwargs)
+
+ def _parse_revision(self, revision_data, url=None):
+ return {
+ 'vcodec': 'none',
+ 'media_type': 'revision',
+ 'extractor_key': BandlabIE.ie_key(),
+ 'extractor': BandlabIE.IE_NAME,
+ **traverse_obj(revision_data, {
+ 'webpage_url': (
+ 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
+ 'id': (('revisionId', 'id'), {str}, any),
+ 'title': ('song', 'name', {str}),
+ 'track': ('song', 'name', {str}),
+ 'url': ('mixdown', 'file', {url_or_none}),
+ 'thumbnail': ('song', 'picture', 'url', {url_or_none}),
+ 'description': ('description', {str}),
+ 'uploader': ('creator', 'name', {str}),
+ 'uploader_id': ('creator', 'username', {str}),
+ 'timestamp': ('createdOn', {parse_iso8601}),
+ 'duration': ('mixdown', 'duration', {float_or_none}),
+ 'view_count': ('counters', 'plays', {int_or_none}),
+ 'like_count': ('counters', 'likes', {int_or_none}),
+ 'comment_count': ('counters', 'comments', {int_or_none}),
+ 'genres': ('genres', ..., 'name', {str}),
+ }),
+ }
+
+ def _parse_track(self, track_data, url=None):
+ return {
+ 'vcodec': 'none',
+ 'media_type': 'track',
+ 'extractor_key': BandlabIE.ie_key(),
+ 'extractor': BandlabIE.IE_NAME,
+ **traverse_obj(track_data, {
+ 'webpage_url': (
+ 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
+ 'id': (('revisionId', 'id'), {str}, any),
+ 'url': ('track', 'sample', 'audioUrl', {url_or_none}),
+ 'title': ('track', 'name', {str}),
+ 'track': ('track', 'name', {str}),
+ 'description': ('caption', {str}),
+ 'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
+ 'view_count': ('counters', 'plays', {int_or_none}),
+ 'like_count': ('counters', 'likes', {int_or_none}),
+ 'comment_count': ('counters', 'comments', {int_or_none}),
+ 'duration': ('track', 'sample', 'duration', {float_or_none}),
+ 'uploader': ('creator', 'name', {str}),
+ 'uploader_id': ('creator', 'username', {str}),
+ 'timestamp': ('createdOn', {parse_iso8601}),
+ }),
+ }
+
+ def _parse_video(self, video_data, url=None):
+ return {
+ 'media_type': 'video',
+ 'extractor_key': BandlabIE.ie_key(),
+ 'extractor': BandlabIE.IE_NAME,
+ **traverse_obj(video_data, {
+ 'id': ('id', {str}),
+ 'webpage_url': (
+ 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
+ 'url': ('video', 'url', {url_or_none}),
+ 'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
+ 'description': ('caption', {str}),
+ 'thumbnail': ('video', 'picture', 'url', {url_or_none}),
+ 'view_count': ('video', 'counters', 'plays', {int_or_none}),
+ 'like_count': ('video', 'counters', 'likes', {int_or_none}),
+ 'comment_count': ('counters', 'comments', {int_or_none}),
+ 'duration': ('video', 'duration', {float_or_none}),
+ 'uploader': ('creator', 'name', {str}),
+ 'uploader_id': ('creator', 'username', {str}),
+ }),
+ }
+
+
+class BandlabIE(BandlabBaseIE):
+ _VALID_URL = [
+ r'https?://(?:www\.)?bandlab.com/(?Ptrack|post|revision)/(?P[\da-f_-]+)',
+ r'https?://(?:www\.)?bandlab.com/(?Pembed)/\?(?:[^#]*&)?id=(?P[\da-f-]+)',
+ ]
+ _EMBED_REGEX = [rf'