mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-19 15:05:47 +00:00
[ie/youtube] Extract live adaptive incomplete formats (#15937)
Closes #10148 Authored by: CanOfSocks, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
This commit is contained in:
@@ -1870,7 +1870,7 @@ The following extractors use this feature:
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread,max-depth`. Default is `all,all,all,all,all`
|
||||
* A `max-depth` value of `1` will discard all replies, regardless of the `max-replies` or `max-replies-per-thread` values given
|
||||
* E.g. `all,all,1000,10,2` will get a maximum of 1000 replies total, with up to 10 replies per thread, and only 2 levels of depth (i.e. top-level comments plus their immediate replies). `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash, live adaptive https, and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
|
||||
@@ -3212,6 +3212,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
])
|
||||
skip_player_js = 'js' in self._configuration_arg('player_skip')
|
||||
format_types = self._configuration_arg('formats')
|
||||
skip_bad_formats = 'incomplete' not in format_types
|
||||
all_formats = 'duplicate' in format_types
|
||||
if self._configuration_arg('include_duplicate_formats'):
|
||||
all_formats = True
|
||||
@@ -3457,7 +3458,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
https_fmts = []
|
||||
|
||||
for fmt_stream in streaming_formats:
|
||||
if fmt_stream.get('targetDurationSec'):
|
||||
# Live adaptive https formats are not supported: skip unless extractor-arg given
|
||||
if fmt_stream.get('targetDurationSec') and skip_bad_formats:
|
||||
continue
|
||||
|
||||
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
||||
@@ -3569,7 +3571,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
yield from process_https_formats()
|
||||
|
||||
needs_live_processing = self._needs_live_processing(live_status, duration)
|
||||
skip_bad_formats = 'incomplete' not in format_types
|
||||
|
||||
skip_manifests = set(self._configuration_arg('skip'))
|
||||
if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
|
||||
@@ -4079,16 +4080,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
needs_live_processing = self._needs_live_processing(live_status, duration)
|
||||
|
||||
def is_bad_format(fmt):
|
||||
if needs_live_processing and not fmt.get('is_from_start'):
|
||||
return True
|
||||
elif (live_status == 'is_live' and needs_live_processing != 'is_live'
|
||||
and fmt.get('protocol') == 'http_dash_segments'):
|
||||
return True
|
||||
def adjust_incomplete_format(fmt, note_suffix='(Last 2 hours)', pref_adjustment=-10):
|
||||
fmt['preference'] = (fmt.get('preference') or -1) + pref_adjustment
|
||||
fmt['format_note'] = join_nonempty(fmt.get('format_note'), note_suffix, delim=' ')
|
||||
|
||||
for fmt in filter(is_bad_format, formats):
|
||||
fmt['preference'] = (fmt.get('preference') or -1) - 10
|
||||
fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
|
||||
# Adjust preference and format note for incomplete live/post-live formats
|
||||
if live_status in ('is_live', 'post_live'):
|
||||
for fmt in formats:
|
||||
protocol = fmt.get('protocol')
|
||||
# Currently, protocol isn't set for adaptive https formats, but this could change
|
||||
is_adaptive = protocol in (None, 'http', 'https')
|
||||
if live_status == 'post_live' and is_adaptive:
|
||||
# Post-live adaptive formats cause HttpFD to raise "Did not get any data blocks"
|
||||
# These formats are *only* useful to external applications, so we can hide them
|
||||
# Set their preference <= -1000 so that FormatSorter flags them as 'hidden'
|
||||
adjust_incomplete_format(fmt, note_suffix='(ended)', pref_adjustment=-5000)
|
||||
# Is it live with --live-from-start? Or is it post-live and its duration is >2hrs?
|
||||
elif needs_live_processing:
|
||||
if not fmt.get('is_from_start'):
|
||||
# Post-live m3u8 formats for >2hr streams
|
||||
adjust_incomplete_format(fmt)
|
||||
elif live_status == 'is_live':
|
||||
if protocol == 'http_dash_segments':
|
||||
# Live DASH formats without --live-from-start
|
||||
adjust_incomplete_format(fmt)
|
||||
elif is_adaptive:
|
||||
# Incomplete live adaptive https formats
|
||||
adjust_incomplete_format(fmt, note_suffix='(incomplete)', pref_adjustment=-20)
|
||||
|
||||
if needs_live_processing:
|
||||
self._prepare_live_from_start_formats(
|
||||
|
||||
Reference in New Issue
Block a user