mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-03-01 03:40:10 +00:00
Better support HLS media discontinuity and fully support media initialization (#105)
* Added options: `--hls-split-discontinuity` and `--no-hls-split-discontinuity` Authored-by: shirtjs <2660574+shirtjs@users.noreply.github.com>
This commit is contained in:
@@ -364,7 +364,7 @@ class YoutubeDL(object):
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||
xattr_set_filesize, external_downloader_args, hls_use_mpegts,
|
||||
http_chunk_size.
|
||||
hls_split_discontinuity, http_chunk_size.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
|
||||
|
||||
@@ -536,6 +536,7 @@ def _real_main(argv=None):
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||
'hls_split_discontinuity': opts.hls_split_discontinuity,
|
||||
'external_downloader_args': opts.external_downloader_args,
|
||||
'postprocessor_args': opts.postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
|
||||
@@ -43,7 +43,6 @@ class HlsFD(FragmentFD):
|
||||
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||
# # event media playlists [4]
|
||||
# r'#EXT-X-MAP:', # media initialization [5]
|
||||
r'^\s*(?:[^#\s]|#EXT-X-MAP:).+?\n\s*#EXT-X-MAP:', # media initialization [5]
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||
@@ -129,6 +128,7 @@ class HlsFD(FragmentFD):
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
format_index = info_dict.get('format_index')
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
@@ -138,6 +138,7 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
key_list = []
|
||||
byte_range = {}
|
||||
discontinuity_count = 0
|
||||
frag_index = 0
|
||||
ad_frag_next = False
|
||||
for line in s.splitlines():
|
||||
@@ -145,6 +146,8 @@ class HlsFD(FragmentFD):
|
||||
download_frag = False
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
if format_index and discontinuity_count != format_index:
|
||||
continue
|
||||
if ad_frag_next:
|
||||
continue
|
||||
frag_index += 1
|
||||
@@ -163,6 +166,8 @@ class HlsFD(FragmentFD):
|
||||
download_frag = True
|
||||
|
||||
elif line.startswith('#EXT-X-MAP'):
|
||||
if format_index and discontinuity_count != format_index:
|
||||
continue
|
||||
if frag_index > 0:
|
||||
self.report_error(
|
||||
'initialization fragment found after media fragments, unable to download')
|
||||
@@ -218,6 +223,8 @@ class HlsFD(FragmentFD):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
elif line.startswith('#EXT-X-DISCONTINUITY'):
|
||||
discontinuity_count += 1
|
||||
|
||||
if download_frag:
|
||||
count = 0
|
||||
|
||||
@@ -1833,9 +1833,8 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None, quality=None,
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False, data=None, headers={},
|
||||
query={}):
|
||||
m3u8_id=None, live=False, note=None, errnote=None,
|
||||
fatal=True, data=None, headers={}, query={}):
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
@@ -1850,11 +1849,14 @@ class InfoExtractor(object):
|
||||
|
||||
return self._parse_m3u8_formats(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
preference=preference, quality=quality, m3u8_id=m3u8_id, live=live)
|
||||
preference=preference, quality=quality, m3u8_id=m3u8_id,
|
||||
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
|
||||
headers=headers, query=query, video_id=video_id)
|
||||
|
||||
def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
|
||||
entry_protocol='m3u8', preference=None, quality=None,
|
||||
m3u8_id=None, live=False):
|
||||
m3u8_id=None, live=False, note=None, errnote=None,
|
||||
fatal=True, data=None, headers={}, query={}, video_id=None):
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return []
|
||||
|
||||
@@ -1868,6 +1870,8 @@ class InfoExtractor(object):
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
split_discontinuity = self._downloader.params.get('hls_split_discontinuity', False)
|
||||
|
||||
# References:
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||
# 2. https://github.com/ytdl-org/youtube-dl/issues/12211
|
||||
@@ -1884,15 +1888,67 @@ class InfoExtractor(object):
|
||||
# media playlist and MUST NOT appear in master playlist thus we can
|
||||
# clearly detect media playlist with this criterion.
|
||||
|
||||
def _extract_m3u8_playlist_formats(format_url, m3u8_doc=None):
|
||||
if not m3u8_doc:
|
||||
res = self._download_webpage_handle(
|
||||
format_url, video_id,
|
||||
note=False,
|
||||
errnote=errnote or 'Failed to download m3u8 playlist information',
|
||||
fatal=fatal, data=data, headers=headers, query=query)
|
||||
|
||||
if res is False:
|
||||
return []
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
format_url = urlh.geturl()
|
||||
|
||||
playlist_formats = []
|
||||
i = (
|
||||
0
|
||||
if split_discontinuity
|
||||
else None)
|
||||
format_info = {
|
||||
'index': i,
|
||||
'key_data': None,
|
||||
'files': [],
|
||||
}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if not line.startswith('#'):
|
||||
format_info['files'].append(line)
|
||||
elif split_discontinuity and line.startswith('#EXT-X-DISCONTINUITY'):
|
||||
i += 1
|
||||
playlist_formats.append(format_info)
|
||||
format_info = {
|
||||
'index': i,
|
||||
'url': format_url,
|
||||
'files': [],
|
||||
}
|
||||
playlist_formats.append(format_info)
|
||||
return playlist_formats
|
||||
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||
return [{
|
||||
'url': m3u8_url,
|
||||
'format_id': m3u8_id,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}]
|
||||
|
||||
playlist_formats = _extract_m3u8_playlist_formats(m3u8_doc, True)
|
||||
|
||||
for format in playlist_formats:
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
format_index = format.get('index')
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_index': format_index,
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
formats.append(f)
|
||||
|
||||
return formats
|
||||
|
||||
groups = {}
|
||||
last_stream_inf = {}
|
||||
@@ -1908,23 +1964,31 @@ class InfoExtractor(object):
|
||||
return
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
manifest_url = format_url(media_url)
|
||||
format_id = []
|
||||
for v in (m3u8_id, group_id, name):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(media_url),
|
||||
'manifest_url': m3u8_url,
|
||||
'language': media.get('LANGUAGE'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
if media_type == 'AUDIO':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url)
|
||||
|
||||
for format in playlist_formats:
|
||||
format_index = format.get('index')
|
||||
for v in (m3u8_id, group_id, name):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_index': format_index,
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'language': media.get('LANGUAGE'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
if media_type == 'AUDIO':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
|
||||
def build_stream_name():
|
||||
# Despite specification does not mention NAME attribute for
|
||||
@@ -1961,74 +2025,82 @@ class InfoExtractor(object):
|
||||
tbr = float_or_none(
|
||||
last_stream_inf.get('AVERAGE-BANDWIDTH')
|
||||
or last_stream_inf.get('BANDWIDTH'), scale=1000)
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
stream_name = build_stream_name()
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
manifest_url = format_url(line.strip())
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
if mobj:
|
||||
f['width'] = int(mobj.group('width'))
|
||||
f['height'] = int(mobj.group('height'))
|
||||
# Unified Streaming Platform
|
||||
mobj = re.search(
|
||||
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||
if mobj:
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
f.update({
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
codecs = parse_codecs(last_stream_inf.get('CODECS'))
|
||||
f.update(codecs)
|
||||
audio_group_id = last_stream_inf.get('AUDIO')
|
||||
# As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
|
||||
# references a rendition group MUST have a CODECS attribute.
|
||||
# However, this is not always respected, for example, [2]
|
||||
# contains EXT-X-STREAM-INF tag which references AUDIO
|
||||
# rendition group but does not have CODECS and despite
|
||||
# referencing an audio group it represents a complete
|
||||
# (with audio and video) format. So, for such cases we will
|
||||
# ignore references to rendition groups and treat them
|
||||
# as complete formats.
|
||||
if audio_group_id and codecs and f.get('vcodec') != 'none':
|
||||
audio_group = groups.get(audio_group_id)
|
||||
if audio_group and audio_group[0].get('URI'):
|
||||
# TODO: update acodec for audio only formats with
|
||||
# the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
formats.append(f)
|
||||
|
||||
# for DailyMotion
|
||||
progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
|
||||
if progressive_uri:
|
||||
http_f = f.copy()
|
||||
del http_f['manifest_url']
|
||||
http_f.update({
|
||||
'format_id': f['format_id'].replace('hls-', 'http-'),
|
||||
'protocol': 'http',
|
||||
'url': progressive_uri,
|
||||
})
|
||||
formats.append(http_f)
|
||||
playlist_formats = _extract_m3u8_playlist_formats(manifest_url)
|
||||
|
||||
for format in playlist_formats:
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
format_index = format.get('index')
|
||||
stream_name = build_stream_name()
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
if format_index:
|
||||
format_id.append(str(format_index))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_index': format_index,
|
||||
'url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
'quality': quality,
|
||||
}
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
if mobj:
|
||||
f['width'] = int(mobj.group('width'))
|
||||
f['height'] = int(mobj.group('height'))
|
||||
# Unified Streaming Platform
|
||||
mobj = re.search(
|
||||
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||
if mobj:
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
f.update({
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
codecs = parse_codecs(last_stream_inf.get('CODECS'))
|
||||
f.update(codecs)
|
||||
audio_group_id = last_stream_inf.get('AUDIO')
|
||||
# As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
|
||||
# references a rendition group MUST have a CODECS attribute.
|
||||
# However, this is not always respected, for example, [2]
|
||||
# contains EXT-X-STREAM-INF tag which references AUDIO
|
||||
# rendition group but does not have CODECS and despite
|
||||
# referencing an audio group it represents a complete
|
||||
# (with audio and video) format. So, for such cases we will
|
||||
# ignore references to rendition groups and treat them
|
||||
# as complete formats.
|
||||
if audio_group_id and codecs and f.get('vcodec') != 'none':
|
||||
audio_group = groups.get(audio_group_id)
|
||||
if audio_group and audio_group[0].get('URI'):
|
||||
# TODO: update acodec for audio only formats with
|
||||
# the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
formats.append(f)
|
||||
|
||||
# for DailyMotion
|
||||
progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
|
||||
if progressive_uri:
|
||||
http_f = f.copy()
|
||||
del http_f['manifest_url']
|
||||
http_f.update({
|
||||
'format_id': f['format_id'].replace('hls-', 'http-'),
|
||||
'protocol': 'http',
|
||||
'url': progressive_uri,
|
||||
})
|
||||
formats.append(http_f)
|
||||
|
||||
last_stream_inf = {}
|
||||
return formats
|
||||
|
||||
@@ -1226,6 +1226,15 @@ def parseOpts(overrideArguments=None):
|
||||
'--ignore-dynamic-mpd', '--no-allow-dynamic-mpd',
|
||||
action='store_false', dest='dynamic_mpd',
|
||||
help='Do not process dynamic DASH manifests (Alias: --no-allow-dynamic-mpd)')
|
||||
extractor.add_option(
|
||||
'--hls-split-discontinuity',
|
||||
dest='hls_split_discontinuity', action='store_true', default=False,
|
||||
help='Split HLS playlists to different formats at discontinuities such as ad breaks'
|
||||
)
|
||||
extractor.add_option(
|
||||
'--no-hls-split-discontinuity',
|
||||
dest='hls_split_discontinuity', action='store_false',
|
||||
help='Do not split HLS playlists to different formats at discontinuities such as ad breaks (default)')
|
||||
|
||||
parser.add_option_group(general)
|
||||
parser.add_option_group(network)
|
||||
|
||||
Reference in New Issue
Block a user