mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-11-13 13:05:13 +00:00
[ie/web.archive:youtube] Fix extractor (#14753)
Closes #14681, Closes #14741 Authored by: seproDev
This commit is contained in:
@@ -13,12 +13,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import copy
|
import copy
|
||||||
import itertools
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from test.helper import FakeYDL, assertRegexpMatches, try_rm
|
from test.helper import FakeYDL, assertRegexpMatches, try_rm
|
||||||
from yt_dlp import YoutubeDL
|
from yt_dlp import YoutubeDL
|
||||||
from yt_dlp.extractor import YoutubeIE
|
|
||||||
from yt_dlp.extractor.common import InfoExtractor
|
from yt_dlp.extractor.common import InfoExtractor
|
||||||
from yt_dlp.postprocessor.common import PostProcessor
|
from yt_dlp.postprocessor.common import PostProcessor
|
||||||
from yt_dlp.utils import (
|
from yt_dlp.utils import (
|
||||||
@@ -337,99 +335,6 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL({'format': '[format_id!*=-]'})
|
ydl = YDL({'format': '[format_id!*=-]'})
|
||||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
def test_youtube_format_selection(self):
|
|
||||||
# FIXME: Rewrite in accordance with the new format sorting options
|
|
||||||
return
|
|
||||||
|
|
||||||
order = [
|
|
||||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
|
||||||
# Apple HTTP Live Streaming
|
|
||||||
'96', '95', '94', '93', '92', '132', '151',
|
|
||||||
# 3D
|
|
||||||
'85', '84', '102', '83', '101', '82', '100',
|
|
||||||
# Dash video
|
|
||||||
'137', '248', '136', '247', '135', '246',
|
|
||||||
'245', '244', '134', '243', '133', '242', '160',
|
|
||||||
# Dash audio
|
|
||||||
'141', '172', '140', '171', '139',
|
|
||||||
]
|
|
||||||
|
|
||||||
def format_info(f_id):
|
|
||||||
info = YoutubeIE._formats[f_id].copy()
|
|
||||||
|
|
||||||
# XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
|
|
||||||
# and 'vcodec', while in tests such information is incomplete since
|
|
||||||
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
|
|
||||||
# test_YoutubeDL.test_youtube_format_selection is broken without
|
|
||||||
# this fix
|
|
||||||
if 'acodec' in info and 'vcodec' not in info:
|
|
||||||
info['vcodec'] = 'none'
|
|
||||||
elif 'vcodec' in info and 'acodec' not in info:
|
|
||||||
info['acodec'] = 'none'
|
|
||||||
|
|
||||||
info['format_id'] = f_id
|
|
||||||
info['url'] = 'url:' + f_id
|
|
||||||
return info
|
|
||||||
formats_order = [format_info(f_id) for f_id in order]
|
|
||||||
|
|
||||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
|
||||||
ydl = YDL({'format': 'bestvideo+bestaudio'})
|
|
||||||
ydl.sort_formats(info_dict)
|
|
||||||
ydl.process_ie_result(info_dict)
|
|
||||||
downloaded = ydl.downloaded_info_dicts[0]
|
|
||||||
self.assertEqual(downloaded['format_id'], '248+172')
|
|
||||||
self.assertEqual(downloaded['ext'], 'mp4')
|
|
||||||
|
|
||||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
|
||||||
ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
|
|
||||||
ydl.sort_formats(info_dict)
|
|
||||||
ydl.process_ie_result(info_dict)
|
|
||||||
downloaded = ydl.downloaded_info_dicts[0]
|
|
||||||
self.assertEqual(downloaded['format_id'], '38')
|
|
||||||
|
|
||||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
|
||||||
ydl = YDL({'format': 'bestvideo/best,bestaudio'})
|
|
||||||
ydl.sort_formats(info_dict)
|
|
||||||
ydl.process_ie_result(info_dict)
|
|
||||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
|
||||||
self.assertEqual(downloaded_ids, ['137', '141'])
|
|
||||||
|
|
||||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
|
||||||
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
|
|
||||||
ydl.sort_formats(info_dict)
|
|
||||||
ydl.process_ie_result(info_dict)
|
|
||||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
|
||||||
self.assertEqual(downloaded_ids, ['137+141', '248+141'])
|
|
||||||
|
|
||||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
|
||||||
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
|
|
||||||
ydl.sort_formats(info_dict)
|
|
||||||
ydl.process_ie_result(info_dict)
|
|
||||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
|
||||||
self.assertEqual(downloaded_ids, ['136+141', '247+141'])
|
|
||||||
|
|
||||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
|
||||||
ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
|
|
||||||
ydl.sort_formats(info_dict)
|
|
||||||
ydl.process_ie_result(info_dict)
|
|
||||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
|
||||||
self.assertEqual(downloaded_ids, ['248+141'])
|
|
||||||
|
|
||||||
for f1, f2 in itertools.pairwise(formats_order):
|
|
||||||
info_dict = _make_result([f1, f2], extractor='youtube')
|
|
||||||
ydl = YDL({'format': 'best/bestvideo'})
|
|
||||||
ydl.sort_formats(info_dict)
|
|
||||||
ydl.process_ie_result(info_dict)
|
|
||||||
downloaded = ydl.downloaded_info_dicts[0]
|
|
||||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
|
||||||
|
|
||||||
info_dict = _make_result([f2, f1], extractor='youtube')
|
|
||||||
ydl = YDL({'format': 'best/bestvideo'})
|
|
||||||
ydl.sort_formats(info_dict)
|
|
||||||
ydl.process_ie_result(info_dict)
|
|
||||||
downloaded = ydl.downloaded_info_dicts[0]
|
|
||||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
|
||||||
|
|
||||||
def test_audio_only_extractor_format_selection(self):
|
def test_audio_only_extractor_format_selection(self):
|
||||||
# For extractors with incomplete formats (all formats are audio-only or
|
# For extractors with incomplete formats (all formats are audio-only or
|
||||||
# video-only) best and worst should fallback to corresponding best/worst
|
# video-only) best and worst should fallback to corresponding best/worst
|
||||||
|
|||||||
@@ -5,12 +5,9 @@ import re
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
from .youtube import YoutubeBaseInfoExtractor
|
||||||
from ..networking import HEADRequest
|
|
||||||
from ..networking.exceptions import HTTPError
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
ExtractorError,
|
|
||||||
bug_reports_message,
|
bug_reports_message,
|
||||||
clean_html,
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
@@ -21,18 +18,14 @@ from ..utils import (
|
|||||||
join_nonempty,
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlhandle_detect_ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -471,7 +464,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'lTx3G6h2xyA',
|
'id': 'lTx3G6h2xyA',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Madeon - Pop Culture (live mashup)',
|
'title': 'Madeon - Pop Culture (live mashup)',
|
||||||
'upload_date': '20110711',
|
'upload_date': '20110711',
|
||||||
'uploader': 'Madeon',
|
'uploader': 'Madeon',
|
||||||
@@ -578,7 +571,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Q_yjX80U7Yc',
|
'id': 'Q_yjX80U7Yc',
|
||||||
'ext': 'flv',
|
'ext': 'webm',
|
||||||
'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest',
|
'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest',
|
||||||
'uploader_id': 'claybutlermusic',
|
'uploader_id': 'claybutlermusic',
|
||||||
'description': 'md5:4595264559e3d0a0ceb3f011f6334543',
|
'description': 'md5:4595264559e3d0a0ceb3f011f6334543',
|
||||||
@@ -680,6 +673,37 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
'upload_date': '20120407',
|
'upload_date': '20120407',
|
||||||
'uploader_id': 'thecomputernerd01',
|
'uploader_id': 'thecomputernerd01',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Contains split audio/video formats
|
||||||
|
'url': 'ytarchive:o_T_S_TU12M',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'o_T_S_TU12M',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Prairie Pulse 1218; Lin Enger, Paul Olson',
|
||||||
|
'description': 'md5:36e7a34cdc8508e35a920ec042e799c7',
|
||||||
|
'uploader': 'Prairie Public',
|
||||||
|
'channel_id': 'UC4BOzQel6tvJm7OEDd3vZlw',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UC4BOzQel6tvJm7OEDd3vZlw',
|
||||||
|
'duration': 1606,
|
||||||
|
'upload_date': '20150213',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Video unavailable through wayback-fakeurl
|
||||||
|
'url': 'ytarchive:SQCom7wjGDs',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'SQCom7wjGDs',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Jamin Warren from PBS Game/Show decides that Portal is a feminist Game [Top Hats and No Brain]',
|
||||||
|
'description': 'md5:c0cb876dd075483ead9afcc86798efb0',
|
||||||
|
'uploader': 'Top Hats and Champagne',
|
||||||
|
'uploader_id': 'sparrowtm',
|
||||||
|
'uploader_url': 'https://www.youtube.com/user/sparrowtm',
|
||||||
|
'channel_id': 'UCW3T5nG4iEkI7HjG-Du3HQA',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCW3T5nG4iEkI7HjG-Du3HQA',
|
||||||
|
'duration': 1500,
|
||||||
|
'thumbnail': 'https://web.archive.org/web/20160108040020if_/https://i.ytimg.com/vi/SQCom7wjGDs/maxresdefault.jpg',
|
||||||
|
'upload_date': '20160107',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -724,6 +748,113 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
_OLDEST_CAPTURE_DATE = 20050214000000
|
_OLDEST_CAPTURE_DATE = 20050214000000
|
||||||
_NEWEST_CAPTURE_DATE = 20500101000000
|
_NEWEST_CAPTURE_DATE = 20500101000000
|
||||||
|
|
||||||
|
_FORMATS = {
|
||||||
|
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'vcodec': 'h263'},
|
||||||
|
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'vcodec': 'h263'},
|
||||||
|
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||||
|
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||||
|
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
|
||||||
|
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||||
|
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'vcodec': 'vp8'},
|
||||||
|
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'vcodec': 'vp8'},
|
||||||
|
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'vcodec': 'vp8'},
|
||||||
|
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'vcodec': 'vp8'},
|
||||||
|
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
|
||||||
|
|
||||||
|
# 3D videos
|
||||||
|
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
|
||||||
|
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
|
||||||
|
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
|
||||||
|
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'vcodec': 'h264', 'preference': -20},
|
||||||
|
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
|
||||||
|
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
|
||||||
|
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'vcodec': 'vp8', 'preference': -20},
|
||||||
|
|
||||||
|
# Apple HTTP Live Streaming
|
||||||
|
'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'vcodec': 'h264'},
|
||||||
|
|
||||||
|
# DASH mp4 video
|
||||||
|
'133': {'ext': 'mp4', 'height': 240, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
'134': {'ext': 'mp4', 'height': 360, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
'135': {'ext': 'mp4', 'height': 480, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
'136': {'ext': 'mp4', 'height': 720, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
'137': {'ext': 'mp4', 'height': 1080, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
'138': {'ext': 'mp4', 'vcodec': 'h264', 'acodec': 'none'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
|
||||||
|
'160': {'ext': 'mp4', 'height': 144, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
'212': {'ext': 'mp4', 'height': 480, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
'264': {'ext': 'mp4', 'height': 1440, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
'298': {'ext': 'mp4', 'height': 720, 'vcodec': 'h264', 'fps': 60, 'acodec': 'none'},
|
||||||
|
'299': {'ext': 'mp4', 'height': 1080, 'vcodec': 'h264', 'fps': 60, 'acodec': 'none'},
|
||||||
|
'266': {'ext': 'mp4', 'height': 2160, 'vcodec': 'h264', 'acodec': 'none'},
|
||||||
|
|
||||||
|
# Dash mp4 audio
|
||||||
|
'139': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||||
|
'140': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||||
|
'141': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||||
|
'256': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||||
|
'258': {'ext': 'm4a', 'acodec': 'aac', 'vcodec': 'none'},
|
||||||
|
'325': {'ext': 'm4a', 'acodec': 'dtse', 'vcodec': 'none'},
|
||||||
|
'328': {'ext': 'm4a', 'acodec': 'ec-3', 'vcodec': 'none'},
|
||||||
|
|
||||||
|
# Dash webm
|
||||||
|
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'vcodec': 'vp8'},
|
||||||
|
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
|
||||||
|
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'vcodec': 'vp8'},
|
||||||
|
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'vcodec': 'vp8'},
|
||||||
|
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
|
||||||
|
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'vcodec': 'vp8'},
|
||||||
|
'278': {'ext': 'webm', 'height': 144, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'242': {'ext': 'webm', 'height': 240, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'243': {'ext': 'webm', 'height': 360, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'244': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'245': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'246': {'ext': 'webm', 'height': 480, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'247': {'ext': 'webm', 'height': 720, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'248': {'ext': 'webm', 'height': 1080, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'271': {'ext': 'webm', 'height': 1440, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
||||||
|
'272': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'302': {'ext': 'webm', 'height': 720, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
|
||||||
|
'303': {'ext': 'webm', 'height': 1080, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
|
||||||
|
'308': {'ext': 'webm', 'height': 1440, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
|
||||||
|
'313': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'acodec': 'none'},
|
||||||
|
'315': {'ext': 'webm', 'height': 2160, 'vcodec': 'vp9', 'fps': 60, 'acodec': 'none'},
|
||||||
|
|
||||||
|
# Dash webm audio
|
||||||
|
'171': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none'},
|
||||||
|
'172': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none'},
|
||||||
|
|
||||||
|
# Dash webm audio with opus inside
|
||||||
|
'249': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
|
||||||
|
'250': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
|
||||||
|
'251': {'ext': 'webm', 'acodec': 'opus', 'vcodec': 'none'},
|
||||||
|
|
||||||
|
# av01 video only formats sometimes served with "unknown" codecs
|
||||||
|
'394': {'ext': 'mp4', 'height': 144, 'vcodec': 'av01.0.00M.08', 'acodec': 'none'},
|
||||||
|
'395': {'ext': 'mp4', 'height': 240, 'vcodec': 'av01.0.00M.08', 'acodec': 'none'},
|
||||||
|
'396': {'ext': 'mp4', 'height': 360, 'vcodec': 'av01.0.01M.08', 'acodec': 'none'},
|
||||||
|
'397': {'ext': 'mp4', 'height': 480, 'vcodec': 'av01.0.04M.08', 'acodec': 'none'},
|
||||||
|
'398': {'ext': 'mp4', 'height': 720, 'vcodec': 'av01.0.05M.08', 'acodec': 'none'},
|
||||||
|
'399': {'ext': 'mp4', 'height': 1080, 'vcodec': 'av01.0.08M.08', 'acodec': 'none'},
|
||||||
|
'400': {'ext': 'mp4', 'height': 1440, 'vcodec': 'av01.0.12M.08', 'acodec': 'none'},
|
||||||
|
'401': {'ext': 'mp4', 'height': 2160, 'vcodec': 'av01.0.12M.08', 'acodec': 'none'},
|
||||||
|
}
|
||||||
|
|
||||||
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
|
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
|
||||||
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
||||||
query = {
|
query = {
|
||||||
@@ -933,23 +1064,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
||||||
url_date = url_date or url_date_2
|
url_date = url_date or url_date_2
|
||||||
|
|
||||||
urlh = None
|
video_info = self._download_json(
|
||||||
retry_manager = self.RetryManager(fatal=False)
|
'https://web.archive.org/__wb/videoinfo', video_id,
|
||||||
for retry in retry_manager:
|
query={'vtype': 'youtube', 'vid': video_id})
|
||||||
try:
|
|
||||||
urlh = self._request_webpage(
|
|
||||||
HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
|
|
||||||
video_id, note='Fetching archived video file url', expected_status=True)
|
|
||||||
except ExtractorError as e:
|
|
||||||
# HTTP Error 404 is expected if the video is not saved.
|
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
|
||||||
self.raise_no_formats(
|
|
||||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
|
||||||
else:
|
|
||||||
retry.error = e
|
|
||||||
|
|
||||||
if retry_manager.error:
|
if not traverse_obj(video_info, 'formats'):
|
||||||
self.raise_no_formats(retry_manager.error, expected=True, video_id=video_id)
|
self.raise_no_formats(
|
||||||
|
'The requested video is not archived or indexed', expected=True)
|
||||||
|
|
||||||
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
|
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
|
||||||
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
|
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
|
||||||
@@ -968,25 +1089,18 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
|
|
||||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||||
|
|
||||||
if urlh:
|
formats = []
|
||||||
url = urllib.parse.unquote(urlh.url)
|
for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
|
||||||
video_file_url_qs = parse_qs(url)
|
format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
|
||||||
# Attempt to recover any ext & format info from playback url & response headers
|
formats.append({
|
||||||
fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
'format_id': format_id,
|
||||||
itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
|
**self._FORMATS.get(format_id, {}),
|
||||||
if itag and itag in YoutubeIE._formats:
|
**traverse_obj(fmt, {
|
||||||
fmt.update(YoutubeIE._formats[itag])
|
'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
|
||||||
fmt.update({'format_id': itag})
|
'ext': ('ext', {str}),
|
||||||
else:
|
'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
|
||||||
mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
|
}),
|
||||||
ext = (mimetype2ext(mime)
|
})
|
||||||
or urlhandle_detect_ext(urlh)
|
info['formats'] = formats
|
||||||
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
|
|
||||||
fmt.update({'ext': ext})
|
|
||||||
info['formats'] = [fmt]
|
|
||||||
if not info.get('duration'):
|
|
||||||
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
|
|
||||||
|
|
||||||
if not info.get('title'):
|
|
||||||
info['title'] = video_id
|
|
||||||
return info
|
return info
|
||||||
|
|||||||
@@ -147,115 +147,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
||||||
)
|
)
|
||||||
_formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
|
|
||||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
|
||||||
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
|
||||||
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
|
||||||
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
|
|
||||||
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
|
|
||||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
|
||||||
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
|
||||||
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
|
||||||
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
|
|
||||||
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
|
||||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
|
||||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
|
||||||
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
|
||||||
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
|
||||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
|
||||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
|
||||||
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
|
||||||
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
|
||||||
|
|
||||||
|
|
||||||
# 3D videos
|
|
||||||
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
|
|
||||||
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
|
|
||||||
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
|
|
||||||
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
|
|
||||||
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
|
|
||||||
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
|
|
||||||
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
|
|
||||||
|
|
||||||
# Apple HTTP Live Streaming
|
|
||||||
'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
|
||||||
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
|
||||||
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
|
||||||
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
|
||||||
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
|
|
||||||
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
|
|
||||||
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
|
||||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
|
|
||||||
|
|
||||||
# DASH mp4 video
|
|
||||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
|
|
||||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
|
|
||||||
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
|
|
||||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
|
||||||
|
|
||||||
# Dash mp4 audio
|
|
||||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
|
|
||||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
|
|
||||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
|
|
||||||
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
|
||||||
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
|
||||||
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
|
|
||||||
'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
|
|
||||||
|
|
||||||
# Dash webm
|
|
||||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
||||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
||||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
||||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
||||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
||||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
|
|
||||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
|
|
||||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
|
||||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
|
||||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
|
||||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
|
||||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
|
|
||||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
|
||||||
|
|
||||||
# Dash webm audio
|
|
||||||
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
|
|
||||||
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
|
|
||||||
|
|
||||||
# Dash webm audio with opus inside
|
|
||||||
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
|
|
||||||
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
|
|
||||||
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
|
|
||||||
|
|
||||||
# RTMP (unnamed)
|
|
||||||
'_rtmp': {'protocol': 'rtmp'},
|
|
||||||
|
|
||||||
# av01 video only formats sometimes served with "unknown" codecs
|
|
||||||
'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
|
|
||||||
'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
|
|
||||||
'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
|
|
||||||
'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
|
|
||||||
'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
|
|
||||||
'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
|
|
||||||
'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
|
||||||
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
|
||||||
}
|
|
||||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
|
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
|
||||||
_DEFAULT_CLIENTS = ('android_sdkless', 'tv', 'web_safari', 'web')
|
_DEFAULT_CLIENTS = ('android_sdkless', 'tv', 'web_safari', 'web')
|
||||||
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web_safari', 'web')
|
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web_safari', 'web')
|
||||||
|
|||||||
Reference in New Issue
Block a user