1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-31 12:01:36 +00:00

Merge remote-tracking branch 'origin' into yt-live-from-start-range

This commit is contained in:
Elyse
2023-06-24 14:30:12 -06:00
81 changed files with 2914 additions and 973 deletions

View File

@@ -195,6 +195,7 @@ class YoutubeDL:
ap_password: Multiple-system operator account password.
usenetrc: Use netrc for authentication instead.
netrc_location: Location of the netrc file. Defaults to ~/.netrc.
netrc_cmd: Use a shell command to get credentials
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
no_warnings: Do not print out anything for warnings.
@@ -263,7 +264,7 @@ class YoutubeDL:
consoletitle: Display progress in console window's titlebar.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
clean_infojson: Remove private fields from the infojson
clean_infojson: Remove internal metadata from the infojson
getcomments: Extract video comments. This will not be written to disk
unless writeinfojson is also given
writeannotations: Write the video annotations to a .annotations.xml file
@@ -1291,17 +1292,17 @@ class YoutubeDL:
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
fmt = f'0{field_size_compat_map[key]:d}d'
if value is None:
value = default
elif replacement is not None:
if None not in (value, replacement):
try:
value = replacement_formatter.format(replacement, value)
except ValueError:
value = na
value, default = None, na
flags = outer_mobj.group('conversion') or ''
str_fmt = f'{fmt[:-1]}s'
if fmt[-1] == 'l': # list
if value is None:
value, fmt = default, 's'
elif fmt[-1] == 'l': # list
delim = '\n' if '#' in flags else ', '
value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
elif fmt[-1] == 'j': # json
@@ -1332,17 +1333,19 @@ class YoutubeDL:
value = str(value)[0]
else:
fmt = str_fmt
elif fmt[-1] not in 'rs': # numeric
elif fmt[-1] not in 'rsa': # numeric
value = float_or_none(value)
if value is None:
value, fmt = default, 's'
if sanitize:
# If value is an object, sanitize might convert it to a string
# So we convert it to repr first
if fmt[-1] == 'r':
# If value is an object, sanitize might convert it to a string
# So we convert it to repr first
value, fmt = repr(value), str_fmt
if fmt[-1] in 'csr':
elif fmt[-1] == 'a':
value, fmt = ascii(value), str_fmt
if fmt[-1] in 'csra':
value = sanitizer(initial_field, value)
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
@@ -1408,7 +1411,7 @@ class YoutubeDL:
def _match_entry(self, info_dict, incomplete=False, silent=False):
"""Returns None if the file should be downloaded"""
_type = info_dict.get('_type', 'video')
_type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
assert incomplete or _type == 'video', 'Only video result can be considered complete'
video_title = info_dict.get('title', info_dict.get('id', 'entry'))
@@ -1906,7 +1909,7 @@ class YoutubeDL:
continue
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
if not lazy and 'playlist-index' in self.params['compat_opts']:
playlist_index = ie_result['requested_entries'][i]
entry_copy = collections.ChainMap(entry, {
@@ -2668,7 +2671,8 @@ class YoutubeDL:
format['dynamic_range'] = 'SDR'
if format.get('aspect_ratio') is None:
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
if (info_dict.get('duration') and format.get('tbr')
if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
and info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')):
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
@@ -2807,11 +2811,13 @@ class YoutubeDL:
new_info.update(fmt)
offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
end_time = offset + min(chapter.get('end_time', duration), duration)
# duration may not be accurate. So allow deviations <1sec
if end_time == float('inf') or end_time > offset + duration + 1:
end_time = None
if chapter or offset:
new_info.update({
'section_start': offset + chapter.get('start_time', 0),
# duration may not be accurate. So allow deviations <1sec
'section_end': end_time if end_time <= offset + duration + 1 else None,
'section_end': end_time,
'section_title': chapter.get('title'),
'section_number': chapter.get('index'),
})
@@ -2963,8 +2969,7 @@ class YoutubeDL:
print_field('url', 'urls')
print_field('thumbnail', optional=True)
print_field('description', optional=True)
if filename:
print_field('filename')
print_field('filename')
if self.params.get('forceduration') and info_copy.get('duration') is not None:
self.to_stdout(formatSeconds(info_copy['duration']))
print_field('format')
@@ -3188,7 +3193,6 @@ class YoutubeDL:
return
if info_dict.get('requested_formats') is not None:
requested_formats = info_dict['requested_formats']
old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None:
if (info_dict['ext'] == 'webm'
@@ -3215,19 +3219,22 @@ class YoutubeDL:
full_filename = correct_ext(full_filename)
temp_filename = correct_ext(temp_filename)
dl_filename = existing_video_file(full_filename, temp_filename)
info_dict['__real_download'] = False
# NOTE: Copy so that original format dicts are not modified
info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
merger = FFmpegMergerPP(self)
downloaded = []
if dl_filename is not None:
self.report_file_already_downloaded(dl_filename)
elif fd:
for f in requested_formats if fd != FFmpegFD else []:
for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
f['filepath'] = fname = prepend_extension(
correct_ext(temp_filename, info_dict['ext']),
'f%s' % f['format_id'], info_dict['ext'])
downloaded.append(fname)
info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
success, real_download = self.dl(temp_filename, info_dict)
info_dict['__real_download'] = real_download
else:
@@ -3251,7 +3258,7 @@ class YoutubeDL:
f'You have requested downloading multiple formats to stdout {reason}. '
'The formats will be streamed one after the other')
fname = temp_filename
for f in requested_formats:
for f in info_dict['requested_formats']:
new_info = dict(info_dict)
del new_info['requested_formats']
new_info.update(f)
@@ -3707,8 +3714,11 @@ class YoutubeDL:
format_field(f, 'fps', '\t%d', func=round),
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
format_field(f, 'audio_channels', '\t%s'),
delim,
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
delim, (
format_field(f, 'filesize', ' \t%s', func=format_bytes)
or format_field(f, 'filesize_approx', '\t%s', func=format_bytes)
or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
format_field(f, 'tbr', '\t%dk', func=round),
shorten_protocol_name(f.get('protocol', '')),
delim,
@@ -4112,8 +4122,11 @@ class YoutubeDL:
ret.append((thumb_filename, thumb_filename_final))
t['filepath'] = thumb_filename
except network_exceptions as err:
if isinstance(err, urllib.error.HTTPError) and err.code == 404:
self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
else:
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
thumbnails.pop(idx)
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
if ret and not write_all:
break
return ret

View File

@@ -189,8 +189,8 @@ def validate_options(opts):
raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"')
# Usernames and passwords
validate(not opts.usenetrc or (opts.username is None and opts.password is None),
'.netrc', msg='using {name} conflicts with giving username/password')
validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc',
msg='{name}, netrc command and username/password are mutually exclusive options')
validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing')
validate(opts.ap_password is None or opts.ap_username is not None,
'TV Provider account username', msg='{name} missing')
@@ -321,41 +321,56 @@ def validate_options(opts):
opts.skip_download = None
del opts.outtmpl['default']
def parse_chapters(name, value):
chapters, ranges = [], []
def parse_chapters(name, value, advanced=False):
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
TIMESTAMP_RE = r'''(?x)(?:
(?P<start_sign>-?)(?P<start>[^-]+)
)?\s*-\s*(?:
(?P<end_sign>-?)(?P<end>[^-]+)
)?'''
current_time = time.time()
chapters, ranges, from_url = [], [], False
for regex in value or []:
if regex.startswith('*'):
for range_ in map(str.strip, regex[1:].split(',')):
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
if None in (dur or [None]):
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
ranges.append(dur)
if advanced and regex == '*from-url':
from_url = True
continue
elif regex.startswith('#'):
for range_ in map(str.strip, regex[1:].split(',')):
mobj = range_ != '-' and re.fullmatch(r'(-?[^-]+)\s*-\s*(-?[^-]+)?', range_)
if not mobj:
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "#start-end"')
start_section = parse_timestamp(mobj.group(1) or '0')
end_section = parse_timestamp(mobj.group(2) or 'inf')
if start_section is None or end_section is None:
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "#start-end"')
ranges.append((current_time + start_section, current_time + end_section))
elif not regex.startswith('*') or not regex.startswith('#'):
try:
chapters.append(re.compile(regex))
except re.error as err:
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
continue
try:
chapters.append(re.compile(regex))
except re.error as err:
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
return chapters, ranges
opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
for range_ in map(str.strip, regex[1:].split(',')):
mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
dur = mobj and [parse_timestamp(mobj.group('start') or '0'),
parse_timestamp(mobj.group('end') or 'inf')]
signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
err = None
if None in (dur or [None]):
err = 'Must be of the form "*start-end"'
elif not advanced and any(signs):
err = 'Negative timestamps are not allowed'
elif regex.startswith('*'):
dur[0] *= -1 if signs[0] else 1
dur[1] *= -1 if signs[1] else 1
if dur[1] == float('-inf'):
err = '"-inf" is not a valid end'
elif regex.startswith('#'):
dur[0] = dur[0] * (-1 if signs[0] else 1) + current_time
dur[1] = dur[1] * (-1 if signs[1] else 1) + current_time
if dur[1] == float('-inf'):
err = '"-inf" is not a valid end'
if err:
raise ValueError(f'invalid {name} time range "{regex}". {err}')
ranges.append(dur)
return chapters, ranges, from_url
opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
# Cookies from browser
if opts.cookiesfrombrowser:
@@ -757,6 +772,7 @@ def parse_options(argv=None):
return ParsedOptions(parser, opts, urls, {
'usenetrc': opts.usenetrc,
'netrc_location': opts.netrc_location,
'netrc_cmd': opts.netrc_cmd,
'username': opts.username,
'password': opts.password,
'twofactor': opts.twofactor,

5
yt_dlp/casefold.py Normal file
View File

@@ -0,0 +1,5 @@
import warnings
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
casefold = str.casefold

View File

@@ -705,11 +705,11 @@ class _LinuxKeyring(Enum):
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
SelectedLinuxBackend
"""
KWALLET4 = auto() # this value is just called KWALLET in the chromium source but it is for KDE4 only
KWALLET = auto() # KDE4
KWALLET5 = auto()
KWALLET6 = auto()
GNOME_KEYRING = auto()
BASIC_TEXT = auto()
GNOMEKEYRING = auto()
BASICTEXT = auto()
SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
@@ -803,7 +803,7 @@ def _choose_linux_keyring(logger):
desktop_environment = _get_linux_desktop_environment(os.environ, logger)
logger.debug(f'detected desktop environment: {desktop_environment.name}')
if desktop_environment == _LinuxDesktopEnvironment.KDE4:
linux_keyring = _LinuxKeyring.KWALLET4
linux_keyring = _LinuxKeyring.KWALLET
elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
linux_keyring = _LinuxKeyring.KWALLET5
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
@@ -811,9 +811,9 @@ def _choose_linux_keyring(logger):
elif desktop_environment in (
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
):
linux_keyring = _LinuxKeyring.BASIC_TEXT
linux_keyring = _LinuxKeyring.BASICTEXT
else:
linux_keyring = _LinuxKeyring.GNOME_KEYRING
linux_keyring = _LinuxKeyring.GNOMEKEYRING
return linux_keyring
@@ -828,7 +828,7 @@ def _get_kwallet_network_wallet(keyring, logger):
"""
default_wallet = 'kdewallet'
try:
if keyring == _LinuxKeyring.KWALLET4:
if keyring == _LinuxKeyring.KWALLET:
service_name = 'org.kde.kwalletd'
wallet_path = '/modules/kwalletd'
elif keyring == _LinuxKeyring.KWALLET5:
@@ -929,11 +929,11 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
logger.debug(f'Chosen keyring: {keyring.name}')
if keyring in (_LinuxKeyring.KWALLET4, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
return _get_kwallet_password(browser_keyring_name, keyring, logger)
elif keyring == _LinuxKeyring.GNOME_KEYRING:
elif keyring == _LinuxKeyring.GNOMEKEYRING:
return _get_gnome_keyring_password(browser_keyring_name, logger)
elif keyring == _LinuxKeyring.BASIC_TEXT:
elif keyring == _LinuxKeyring.BASICTEXT:
# when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
return None
assert False, f'Unknown keyring {keyring}'
@@ -1326,3 +1326,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
self.add_cookie_header(cookie_req)
return cookie_req.get_header('Cookie')
def clear(self, *args, **kwargs):
with contextlib.suppress(KeyError):
return super().clear(*args, **kwargs)

View File

@@ -49,7 +49,6 @@ class FileDownloader:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
continuedl: Attempt to continue downloads if possible
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for expected network errors.
Default is 0 for API, but 10 for CLI

View File

@@ -173,6 +173,9 @@ class FragmentFD(FileDownloader):
**self.params,
'noprogress': True,
'test': False,
'sleep_interval': 0,
'max_sleep_interval': 0,
'sleep_interval_subtitles': 0,
})
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'

View File

@@ -7,9 +7,9 @@ from .common import FileDownloader
from .external import FFmpegFD
from ..utils import (
DownloadError,
str_or_none,
sanitized_Request,
WebSocketsWrapper,
sanitized_Request,
str_or_none,
try_get,
)

View File

@@ -497,6 +497,7 @@ from .dplay import (
DiscoveryPlusItalyIE,
DiscoveryPlusItalyShowIE,
DiscoveryPlusIndiaShowIE,
GlobalCyclingNetworkPlusIE,
)
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
@@ -520,6 +521,7 @@ from .deuxm import (
DeuxMNewsIE
)
from .digitalconcerthall import DigitalConcertHallIE
from .discogs import DiscogsReleasePlaylistIE
from .discovery import DiscoveryIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
@@ -578,6 +580,7 @@ from .espn import (
ESPNCricInfoIE,
)
from .esri import EsriVideoIE
from .ettutv import EttuTvIE
from .europa import EuropaIE, EuroParlWebstreamIE
from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE
@@ -664,6 +667,7 @@ from .funimation import (
FunimationShowIE,
)
from .funk import FunkIE
from .funker530 import Funker530IE
from .fusion import FusionIE
from .fuyintv import FuyinTVIE
from .gab import (
@@ -1116,7 +1120,8 @@ from .mojvideo import MojvideoIE
from .morningstar import MorningstarIE
from .motherless import (
MotherlessIE,
MotherlessGroupIE
MotherlessGroupIE,
MotherlessGalleryIE,
)
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
@@ -1257,6 +1262,7 @@ from .nhk import (
NhkForSchoolProgramListIE,
NhkRadioNewsPageIE,
NhkRadiruIE,
NhkRadiruLiveIE,
)
from .nhl import NHLIE
from .nick import (
@@ -1611,6 +1617,7 @@ from .rentv import (
from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
@@ -1625,6 +1632,7 @@ from .rottentomatoes import RottenTomatoesIE
from .rozhlas import (
RozhlasIE,
RozhlasVltavaIE,
MujRozhlasIE,
)
from .rte import RteIE, RteRadioIE
from .rtlnl import (
@@ -2422,7 +2430,10 @@ from .yandexvideo import (
ZenYandexChannelIE,
)
from .yapfiles import YapFilesIE
from .yappy import YappyIE
from .yappy import (
YappyIE,
YappyProfileIE,
)
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
from .yle_areena import YleAreenaIE
@@ -2440,6 +2451,10 @@ from .younow import (
from .youporn import YouPornIE
from .yourporn import YourPornIE
from .yourupload import YourUploadIE
from .zaiko import (
ZaikoIE,
ZaikoETicketIE,
)
from .zapiks import ZapiksIE
from .zattoo import (
BBVTVIE,

View File

@@ -40,28 +40,33 @@ class ACastBaseIE(InfoExtractor):
class ACastIE(ACastBaseIE):
IE_NAME = 'acast'
_VALID_URL = r'''(?x)
_VALID_URL = r'''(?x:
https?://
(?:
(?:(?:embed|www)\.)?acast\.com/|
play\.acast\.com/s/
)
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
'''
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
)'''
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3',
'title': '2. Raggarmordet - Röster ur det förflutna',
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
'description': 'md5:013959207e05011ad14a222cf22278cc',
'timestamp': 1477346700,
'upload_date': '20161024',
'duration': 2766,
'creator': 'Anton Berg & Martin Johnson',
'creator': 'Third Ear Studio',
'series': 'Spår',
'episode': '2. Raggarmordet - Röster ur det förflutna',
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
'episode_number': 2,
'display_id': '2.raggarmordet-rosterurdetforflutna',
'season_number': 4,
'season': 'Season 4',
}
}, {
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
@@ -73,6 +78,23 @@ class ACastIE(ACastBaseIE):
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://ausi.anu.edu.au/news/democracy-sausage-episode-can-labor-be-long-form-government',
'info_dict': {
'id': '646c68fb21fbf20011e9c651',
'ext': 'mp3',
'creator': 'The Australian National University',
'display_id': 'can-labor-be-a-long-form-government',
'duration': 2618,
'thumbnail': 'https://assets.pippa.io/shows/6113e8578b4903809f16f7e5/1684821529295-515b9520db9ce53275b995eb302f941c.jpeg',
'title': 'Can Labor be a long-form government?',
'episode': 'Can Labor be a long-form government?',
'upload_date': '20230523',
'series': 'Democracy Sausage with Mark Kenny',
'timestamp': 1684826362,
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
}
}]
def _real_extract(self, url):
channel, display_id = self._match_valid_url(url).groups()

View File

@@ -76,59 +76,6 @@ class AfreecaTVIE(InfoExtractor):
},
}],
'skip': 'Video is gone',
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': {
'id': '18650793',
'ext': 'mp4',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '윈아디',
'uploader_id': 'badkids',
'duration': 107,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
'info_dict': {
'id': '10481652',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'duration': 6492,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '20160502_c4c62b9d_174361386_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 3601,
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '20160502_39e739bb_174361386_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 2891,
},
}],
'params': {
'skip_download': True,
},
}, {
# non standard key
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
@@ -146,8 +93,8 @@ class AfreecaTVIE(InfoExtractor):
'skip_download': True,
},
}, {
# PARTIAL_ADULT
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
# adult content
'url': 'https://vod.afreecatv.com/player/97267690',
'info_dict': {
'id': '20180327_27901457_202289533_1',
'ext': 'mp4',
@@ -161,16 +108,25 @@ class AfreecaTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
'expected_warnings': ['adult content'],
'skip': 'The VOD does not exist',
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
'only_matching': True,
}, {
'url': 'http://vod.afreecatv.com/player/15055030',
'only_matching': True,
'url': 'https://vod.afreecatv.com/player/96753363',
'info_dict': {
'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4',
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
'upload_date': '20230108',
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
}]
@staticmethod
@@ -223,26 +179,21 @@ class AfreecaTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if re.search(r'alert\(["\']This video has been deleted', webpage):
raise ExtractorError(
'Video %s has been deleted' % video_id, expected=True)
station_id = self._search_regex(
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
bbs_id = self._search_regex(
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
partial_view = False
adult_view = False
for _ in range(2):
data = self._download_json(
'https://api.m.afreecatv.com/station/video/a/view',
video_id, headers={'Referer': url}, data=urlencode_postdata({
'nTitleNo': video_id,
'nApiLevel': 10,
}))['data']
if traverse_obj(data, ('code', {int})) == -6221:
raise ExtractorError('The VOD does not exist', expected=True)
query = {
'nTitleNo': video_id,
'nStationNo': station_id,
'nBbsNo': bbs_id,
'nStationNo': data['station_no'],
'nBbsNo': data['bbs_no'],
}
if partial_view:
query['partialView'] = 'SKIP_ADULT'

View File

@@ -5,6 +5,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
strip_jsonp,
unified_timestamp,
url_or_none,
)
@@ -15,7 +16,7 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def _extract_feed_info(self, url):
feed = self._download_json(
url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed')
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
item = feed.get('channel', {}).get('item')
if not item:
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
@@ -73,8 +74,10 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
fmts, subs = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),

View File

@@ -49,7 +49,7 @@ class CiscoWebexIE(InfoExtractor):
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
if urlh.status == 403:
if urlh.getcode() == 403:
if stream['code'] == 53004:
self.raise_login_required()
if stream['code'] == 53005:
@@ -59,7 +59,7 @@ class CiscoWebexIE(InfoExtractor):
'This video is protected by a password, use the --video-password option', expected=True)
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
if urlh.status == 429:
if urlh.getcode() == 429:
self.raise_login_required(
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
method='cookies')

View File

@@ -13,9 +13,11 @@ import netrc
import os
import random
import re
import subprocess
import sys
import time
import types
import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree
@@ -34,6 +36,7 @@ from ..utils import (
GeoUtils,
HEADRequest,
LenientJSONDecoder,
Popen,
RegexNotFoundError,
RetryManager,
UnsupportedError,
@@ -56,6 +59,7 @@ from ..utils import (
join_nonempty,
js_to_json,
mimetype2ext,
netrc_from_content,
network_exceptions,
orderedSet,
parse_bitrate,
@@ -286,6 +290,7 @@ class InfoExtractor:
channel_id: Id of the channel.
channel_url: Full URL to a channel webpage.
channel_follower_count: Number of followers of the channel.
channel_is_verified: Whether the channel is verified on the platform.
location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format
{tag: subformats}. "tag" is usually a language code, and
@@ -470,8 +475,8 @@ class InfoExtractor:
Subclasses of this should also be added to the list of extractors and
should define a _VALID_URL regexp and, re-define the _real_extract() and
(optionally) _real_initialize() methods.
should define _VALID_URL as a regexp or a Sequence of regexps, and
re-define the _real_extract() and (optionally) _real_initialize() methods.
Subclasses may also override suitable() if necessary, but ensure the function
signature is preserved and that this function imports everything it needs
@@ -534,7 +539,7 @@ class InfoExtractor:
_EMBED_REGEX = []
def _login_hint(self, method=NO_DEFAULT, netrc=None):
password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
return {
None: '',
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
@@ -561,8 +566,8 @@ class InfoExtractor:
# we have cached the regexp for *this* class, whereas getattr would also
# match the superclass
if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
return cls._VALID_URL_RE.match(url)
cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None)
@classmethod
def suitable(cls, url):
@@ -1290,45 +1295,48 @@ class InfoExtractor:
return clean_html(res)
def _get_netrc_login_info(self, netrc_machine=None):
username = None
password = None
netrc_machine = netrc_machine or self._NETRC_MACHINE
if self.get_param('usenetrc', False):
try:
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
if os.path.isdir(netrc_file):
netrc_file = os.path.join(netrc_file, '.netrc')
info = netrc.netrc(file=netrc_file).authenticators(netrc_machine)
if info is not None:
username = info[0]
password = info[2]
else:
raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine)
except (OSError, netrc.NetrcParseError) as err:
self.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err))
cmd = self.get_param('netrc_cmd')
if cmd:
cmd = cmd.replace('{}', netrc_machine)
self.to_screen(f'Executing command: {cmd}')
stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
if ret != 0:
raise OSError(f'Command returned error code {ret}')
info = netrc_from_content(stdout).authenticators(netrc_machine)
return username, password
elif self.get_param('usenetrc', False):
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
if os.path.isdir(netrc_file):
netrc_file = os.path.join(netrc_file, '.netrc')
info = netrc.netrc(netrc_file).authenticators(netrc_machine)
else:
return None, None
if not info:
raise netrc.NetrcParseError(f'No authenticators for {netrc_machine}')
return info[0], info[2]
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
"""
Get the login info as (username, password)
First look for the manually specified credentials using username_option
and password_option as keys in params dictionary. If no such credentials
available look in the netrc file using the netrc_machine or _NETRC_MACHINE
value.
are available try the netrc_cmd if it is defined or look in the
netrc file using the netrc_machine or _NETRC_MACHINE value.
If there's no info available, return (None, None)
"""
# Attempt to use provided username and password or .netrc data
username = self.get_param(username_option)
if username is not None:
password = self.get_param(password_option)
else:
username, password = self._get_netrc_login_info(netrc_machine)
try:
username, password = self._get_netrc_login_info(netrc_machine)
except (OSError, netrc.NetrcParseError) as err:
self.report_warning(f'Failed to parse .netrc: {err}')
return None, None
return username, password
def _get_tfa_info(self, note='two-factor verification code'):

View File

@@ -0,0 +1,35 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import traverse_obj
class DiscogsReleasePlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?discogs\.com/(?P<type>release|master)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.discogs.com/release/1-The-Persuader-Stockholm',
'info_dict': {
'id': 'release1',
'title': 'Stockholm',
},
'playlist_mincount': 7,
}, {
'url': 'https://www.discogs.com/master/113-Vince-Watson-Moments-In-Time',
'info_dict': {
'id': 'master113',
'title': 'Moments In Time',
},
'playlist_mincount': 53,
}]
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
display_id = f'{playlist_type}{playlist_id}'
response = self._download_json(
f'https://api.discogs.com/{playlist_type}s/{playlist_id}', display_id)
entries = [
self.url_result(video['uri'], YoutubeIE, video_title=video.get('title'))
for video in traverse_obj(response, ('videos', lambda _, v: YoutubeIE.suitable(v['uri'])))]
return self.playlist_result(entries, display_id, response.get('title'))

View File

@@ -65,6 +65,7 @@ class DPlayBaseIE(InfoExtractor):
return streaming_list
def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''):
country = self.get_param('geo_bypass_country') or country
geo_countries = [country.upper()]
self._initialize_geo_bypass({
'countries': geo_countries,
@@ -1001,3 +1002,39 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
_SHOW_STR = 'show'
_INDEX = 4
_VIDEO_IE = DiscoveryPlusIndiaIE
class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
_TESTS = [{
'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
'info_dict': {
'id': '1397691',
'ext': 'mp4',
'title': 'The Athertons: Mountain Biking\'s Fastest Family',
'description': 'md5:75a81937fcd8b989eec6083a709cd837',
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
'series': 'gcn',
'creator': 'Gcn',
'upload_date': '20210309',
'timestamp': 1615248000,
'duration': 2531.0,
'tags': [],
},
'skip': 'Subscription required',
'params': {'skip_download': 'm3u8'},
}]
_PRODUCT = 'web'
_DISCO_API_PARAMS = {
'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
'realm': 'gcn',
'country': 'us',
}
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
'x-disco-params': f'realm={realm}',
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
'Authorization': self._get_auth(disco_base, display_id, realm),
})

View File

@@ -1,13 +1,17 @@
import functools
from .common import InfoExtractor
from .vimeo import VHXEmbedIE
from ..utils import (
ExtractorError,
OnDemandPagedList,
clean_html,
extract_attributes,
get_element_by_class,
get_element_by_id,
get_elements_by_class,
get_elements_html_by_class,
int_or_none,
join_nonempty,
traverse_obj,
unified_strdate,
urlencode_postdata,
)
@@ -162,12 +166,13 @@ class DropoutIE(InfoExtractor):
class DropoutSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)'
_PAGE_SIZE = 24
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:(?P<season>[0-9]+)/?$)'
_TESTS = [
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
'note': 'Multi-season series with the season in the url',
'playlist_count': 17,
'playlist_count': 24,
'info_dict': {
'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1'
@@ -176,7 +181,7 @@ class DropoutSeasonIE(InfoExtractor):
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
'note': 'Multi-season series with the season not in the url',
'playlist_count': 17,
'playlist_count': 24,
'info_dict': {
'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1'
@@ -190,29 +195,30 @@ class DropoutSeasonIE(InfoExtractor):
'id': 'dimension-20-shriek-week-season-1',
'title': 'Dimension 20 Shriek Week - Season 1'
}
},
{
'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3',
'note': 'Multi-season series with season in the url that requires pagination',
'playlist_count': 25,
'info_dict': {
'id': 'breaking-news-no-laugh-newsroom-season-3',
'title': 'Breaking News No Laugh Newsroom - Season 3'
}
}
]
def _fetch_page(self, url, season_id, page):
page += 1
webpage = self._download_webpage(
f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400})
yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj(
get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))]
def _real_extract(self, url):
season_id = self._match_id(url)
season_num = self._match_valid_url(url).group('season') or 1
season_title = season_id.replace('-', ' ').title()
webpage = self._download_webpage(url, season_id)
entries = [
self.url_result(
url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']',
item, 'item_url'),
ie=DropoutIE.ie_key()
) for item in get_elements_by_class('js-collection-item', webpage)
]
seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '')
current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>',
seasons, 'current_season', default='').strip()
return {
'_type': 'playlist',
'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')),
'title': join_nonempty(season_title, current_season, delim=' - '),
'entries': entries
}
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE),
f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')

View File

@@ -1,12 +1,17 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
qualities,
)
class DumpertIE(InfoExtractor):
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
_VALID_URL = r'''(?x)
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
/(?:mediabase|embed|item)/|
(?:/toppers|/latest|/?)\?selectedId=
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
_TESTS = [{
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@@ -16,6 +21,9 @@ class DumpertIE(InfoExtractor):
'title': 'Ik heb nieuws voor je',
'description': 'Niet schrikken hoor',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 9,
'view_count': int,
'like_count': int,
}
}, {
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
@@ -26,6 +34,28 @@ class DumpertIE(InfoExtractor):
}, {
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/item/100031688_b317a185',
'info_dict': {
'id': '100031688/b317a185',
'ext': 'mp4',
'title': 'Epic schijnbeweging',
'description': '<p>Die zag je niet eh</p>',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'duration': 12,
'view_count': int,
'like_count': int,
},
'params': {'skip_download': 'm3u8'}
}, {
'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/latest?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -36,18 +66,23 @@ class DumpertIE(InfoExtractor):
title = item['title']
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
quality = qualities(['flv', 'mobile', 'tablet', '720p', '1080p'])
formats = []
for variant in media.get('variants', []):
uri = variant.get('uri')
if not uri:
continue
version = variant.get('version')
formats.append({
'url': uri,
'format_id': version,
'quality': quality(version),
})
preference = quality(version)
if determine_ext(uri) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
uri, video_id, 'mp4', m3u8_id=version, quality=preference))
else:
formats.append({
'url': uri,
'format_id': version,
'quality': preference,
})
thumbnails = []
stills = item.get('stills') or {}

View File

@@ -0,0 +1,60 @@
from .common import InfoExtractor
from ..utils import bool_or_none, traverse_obj, unified_timestamp, url_or_none
class EttuTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ettu\.tv/[^?#]+/playerpage/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.ettu.tv/en-int/playerpage/1573849',
'md5': '5874b7639a2aa866d1f6c3a4037c7c09',
'info_dict': {
'id': '1573849',
'title': 'Ni Xia Lian - Shao Jieni',
'description': 'ITTF Europe Top 16 Cup',
'timestamp': 1677348600,
'upload_date': '20230225',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'ext': 'mp4',
},
}, {
'url': 'https://www.ettu.tv/en-int/playerpage/1573753',
'md5': '1fc094bf96cf2d5ec0f434d3a6dec9aa',
'info_dict': {
'id': '1573753',
'title': 'Qiu Dang - Jorgic Darko',
'description': 'ITTF Europe Top 16 Cup',
'timestamp': 1677423600,
'upload_date': '20230226',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'ext': 'mp4',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
player_settings = self._download_json(
f'https://www.ettu.tv/api/v3/contents/{video_id}/player-settings', video_id, query={
'language': 'en',
'showTitle': 'true',
'device': 'desktop',
})
stream_response = self._download_json(player_settings['streamAccess'], video_id, data={})
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
stream_response['data']['stream'], video_id, 'mp4')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(player_settings, {
'title': 'title',
'description': ('metaInformation', 'competition'),
'thumbnail': ('image', {url_or_none}),
'timestamp': ('date', {unified_timestamp}),
'is_live': ('isLivestream', {bool_or_none}),
})
}

View File

@@ -7,8 +7,37 @@ from .common import InfoExtractor
class FoxNewsIE(AMPIE):
IE_NAME = 'foxnews'
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_VALID_URL = r'https?://video\.(?:insider\.)?fox(?:news|business)\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
{
'url': 'https://video.foxnews.com/v/6320653836112',
'info_dict': {
'id': '6320653836112',
'ext': 'mp4',
'title': 'Tucker Carlson joins \'Gutfeld!\' to discuss his new documentary',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 404,
'upload_date': '20230217',
'description': 'md5:858a8a36f59e9ca897d758855bcdfa02',
'timestamp': 1676611344.0,
},
'params': {'skip_download': 'm3u8'},
},
{
# From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
'info_dict': {
'id': '5099377331001',
'ext': 'mp4',
'title': '82416_censoring',
'description': '82416_censoring',
'upload_date': '20160826',
'timestamp': 1472169708.0,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 521,
},
'params': {'skip_download': 'm3u8'},
},
{
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
@@ -22,6 +51,7 @@ class FoxNewsIE(AMPIE):
'upload_date': '20110503',
'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': '404 page',
},
{
'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
@@ -36,10 +66,7 @@ class FoxNewsIE(AMPIE):
'upload_date': '20141204',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'm3u8 HTTP error 400 in web browser',
},
{
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
@@ -49,11 +76,6 @@ class FoxNewsIE(AMPIE):
'url': 'http://video.foxbusiness.com/v/4442309889001',
'only_matching': True,
},
{
# From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
'only_matching': True,
},
]
@classmethod
@@ -67,10 +89,10 @@ class FoxNewsIE(AMPIE):
yield f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
video_id = self._match_id(url)
info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
f'https://api.foxnews.com/v3/video-player/{video_id}?callback=uid_{video_id}')
info['id'] = video_id
return info
@@ -78,6 +100,19 @@ class FoxNewsIE(AMPIE):
class FoxNewsVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.foxnews.com/video/6328632286112',
'info_dict': {
'id': '6328632286112',
'ext': 'mp4',
'title': 'Review: 2023 Toyota Prius Prime',
'duration': 155,
'thumbnail': r're:^https://.+\.jpg$',
'timestamp': 1685720177.0,
'upload_date': '20230602',
'description': 'md5:b69aafb125b41c1402e9744f53d6edc4',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.foxnews.com/video/6313058664112',
'info_dict': {
'id': '6313058664112',
@@ -89,8 +124,7 @@ class FoxNewsVideoIE(InfoExtractor):
'title': 'Gutfeld! - Thursday, September 29',
'timestamp': 1664527538,
},
'expected_warnings': ['Ignoring subtitle tracks'],
'params': {'skip_download': 'm3u8'},
'skip': '404 page',
}]
def _real_extract(self, url):
@@ -104,19 +138,22 @@ class FoxNewsArticleIE(InfoExtractor):
_TESTS = [{
# data-video-id
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
'url': 'https://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
'md5': 'd2dd6ce809cedeefa96460e964821437',
'info_dict': {
'id': '5116295019001',
'ext': 'mp4',
'title': 'Trump and Clinton asked to defend positions on Iraq War',
'description': 'Veterans react on \'The Kelly File\'',
'description': 'Veterans and Fox News host Dana Perino react on \'The Kelly File\' to NBC\'s presidential forum',
'timestamp': 1473301045,
'upload_date': '20160908',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 426,
},
'params': {'skip_download': 'm3u8'},
}, {
# iframe embed
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
'url': 'https://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
'info_dict': {
'id': '5748266721001',
'ext': 'flv',
@@ -127,9 +164,7 @@ class FoxNewsArticleIE(InfoExtractor):
'timestamp': 1520594670,
'upload_date': '20180309',
},
'params': {
'skip_download': True,
},
'skip': '404 page',
}, {
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
'only_matching': True,

View File

@@ -0,0 +1,79 @@
from .common import InfoExtractor
from .rumble import RumbleEmbedIE
from .youtube import YoutubeIE
from ..utils import ExtractorError, clean_html, get_element_by_class, strip_or_none
class Funker530IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funker530\.com/video/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://funker530.com/video/azov-patrol-caught-in-open-under-automatic-grenade-launcher-fire/',
'md5': '085f50fea27523a388bbc22e123e09c8',
'info_dict': {
'id': 'v2qbmu4',
'ext': 'mp4',
'title': 'Azov Patrol Caught In Open Under Automatic Grenade Launcher Fire',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Funker530',
'channel': 'Funker530',
'channel_url': 'https://rumble.com/c/c-1199543',
'width': 1280,
'height': 720,
'fps': 25,
'duration': 27,
'upload_date': '20230608',
'timestamp': 1686241321,
'live_status': 'not_live',
'description': 'md5:bea2e1f458095414e04b5ac189c2f980',
}
}, {
'url': 'https://funker530.com/video/my-friends-joined-the-russians-civdiv/',
'md5': 'a42c2933391210662e93e867d7124b70',
'info_dict': {
'id': 'k-pk4bOvoac',
'ext': 'mp4',
'view_count': int,
'channel': 'Civ Div',
'comment_count': int,
'channel_follower_count': int,
'thumbnail': 'https://i.ytimg.com/vi/k-pk4bOvoac/maxresdefault.jpg',
'uploader_id': '@CivDiv',
'duration': 357,
'channel_url': 'https://www.youtube.com/channel/UCgsCiwJ88up-YyMHo7hL5-A',
'tags': [],
'uploader_url': 'https://www.youtube.com/@CivDiv',
'channel_id': 'UCgsCiwJ88up-YyMHo7hL5-A',
'like_count': int,
'description': 'md5:aef75ec3f59c07a0e39400f609b24429',
'live_status': 'not_live',
'age_limit': 0,
'uploader': 'Civ Div',
'categories': ['People & Blogs'],
'title': 'My “Friends” joined the Russians.',
'availability': 'public',
'upload_date': '20230608',
'playable_in_embed': True,
'heatmap': 'count:100',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
if rumble_url:
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
else:
youtube_url = list(YoutubeIE._extract_embed_urls(url, webpage))
if youtube_url:
info = {'url': youtube_url[0], 'ie_key': YoutubeIE.ie_key()}
if not info:
raise ExtractorError('No videos found on webpage', expected=True)
return {
**info,
'_type': 'url_transparent',
'description': strip_or_none(self._search_regex(
r'(?s)(.+)About the Author', clean_html(get_element_by_class('video-desc-paragraph', webpage)),
'description', default=None))
}

0
yt_dlp/extractor/globalplayer.py Executable file → Normal file
View File

View File

@@ -83,7 +83,7 @@ class HotStarIE(HotStarBaseIE):
_VALID_URL = r'''(?x)
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
(?:
(?P<type>movies|sports|episode|(?P<tv>tv))/
(?P<type>movies|sports|episode|(?P<tv>tv|shows))/
(?(tv)(?:[^/?#]+/){2}|[^?#]*)
)?
[^/?#]+/
@@ -122,6 +122,25 @@ class HotStarIE(HotStarBaseIE):
'episode': 'Janhvi Targets Suman',
'episode_number': 8,
}
}, {
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843',
'info_dict': {
'id': '1000282843',
'ext': 'mp4',
'title': 'Anupama, Anuj Share a Moment',
'season': 'Chapter 1',
'description': 'md5:8d74ed2248423b8b06d5c8add4d7a0c0',
'timestamp': 1678149000,
'channel': 'StarPlus',
'series': 'Anupama',
'season_number': 1,
'season_id': 7399,
'upload_date': '20230307',
'episode': 'Anupama, Anuj Share a Moment',
'episode_number': 853,
'duration': 1272,
'channel_id': 3,
},
}, {
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
'only_matching': True,
@@ -139,6 +158,7 @@ class HotStarIE(HotStarBaseIE):
'sports': 'match',
'episode': 'episode',
'tv': 'episode',
'shows': 'episode',
None: 'content',
}
@@ -304,13 +324,16 @@ class HotStarPrefixIE(InfoExtractor):
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/tv(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
'info_dict': {
'id': '3_2_26',
},
'playlist_mincount': 20,
}, {
'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
'only_matching': True,
}, {
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
'only_matching': True,
@@ -327,7 +350,7 @@ class HotStarPlaylistIE(HotStarBaseIE):
class HotStarSeasonIE(HotStarBaseIE):
IE_NAME = 'hotstar:season'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
'info_dict': {
@@ -346,6 +369,9 @@ class HotStarSeasonIE(HotStarBaseIE):
'id': '8208',
},
'playlist_mincount': 19,
}, {
'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -356,7 +382,7 @@ class HotStarSeasonIE(HotStarBaseIE):
class HotStarSeriesIE(HotStarBaseIE):
IE_NAME = 'hotstar:series'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
'info_dict': {
@@ -375,6 +401,12 @@ class HotStarSeriesIE(HotStarBaseIE):
'id': '435',
},
'playlist_mincount': 267,
}, {
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/',
'info_dict': {
'id': '1260022017',
},
'playlist_mincount': 940,
}]
def _real_extract(self, url):

View File

@@ -1,68 +1,83 @@
import functools
import urllib.parse
import urllib.error
import hashlib
import json
import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
jwt_decode_hs256,
mimetype2ext,
qualities,
traverse_obj,
try_call,
unified_timestamp,
)
# https://github.com/yt-dlp/yt-dlp/issues/6671
class IwaraBaseIE(InfoExtractor):
_NETRC_MACHINE = 'iwara'
_USERTOKEN = None
_MEDIATOKEN = None
_NETRC_MACHINE = 'iwara'
def _get_user_token(self, invalidate=False):
if not invalidate and self._USERTOKEN:
return self._USERTOKEN
def _is_token_expired(self, token, token_type):
# User token TTL == ~3 weeks, Media token TTL == ~1 hour
if (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 120):
self.to_screen(f'{token_type} token has expired')
return True
def _get_user_token(self):
username, password = self._get_login_info()
IwaraBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
if not IwaraBaseIE._USERTOKEN or invalidate:
IwaraBaseIE._USERTOKEN = self._download_json(
if not username or not password:
return
user_token = IwaraBaseIE._USERTOKEN or self.cache.load(self._NETRC_MACHINE, username)
if not user_token or self._is_token_expired(user_token, 'User'):
response = self._download_json(
'https://api.iwara.tv/user/login', None, note='Logging in',
data=json.dumps({
headers={'Content-Type': 'application/json'}, data=json.dumps({
'email': username,
'password': password
}).encode('utf-8'),
headers={
}).encode(), expected_status=lambda x: True)
user_token = traverse_obj(response, ('token', {str}))
if not user_token:
error = traverse_obj(response, ('message', {str}))
if 'invalidLogin' in error:
raise ExtractorError('Invalid login credentials', expected=True)
else:
raise ExtractorError(f'Iwara API said: {error or "nothing"}')
self.cache.store(self._NETRC_MACHINE, username, user_token)
IwaraBaseIE._USERTOKEN = user_token
def _get_media_token(self):
self._get_user_token()
if not IwaraBaseIE._USERTOKEN:
return # user has not passed credentials
if not IwaraBaseIE._MEDIATOKEN or self._is_token_expired(IwaraBaseIE._MEDIATOKEN, 'Media'):
IwaraBaseIE._MEDIATOKEN = self._download_json(
'https://api.iwara.tv/user/token', None, note='Fetching media token',
data=b'', headers={
'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}',
'Content-Type': 'application/json'
})['token']
})['accessToken']
self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN)
return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'}
return self._USERTOKEN
def _get_media_token(self, invalidate=False):
if not invalidate and self._MEDIATOKEN:
return self._MEDIATOKEN
IwaraBaseIE._MEDIATOKEN = self._download_json(
'https://api.iwara.tv/user/token', None, note='Fetching media token',
data=b'', # Need to have some data here, even if it's empty
headers={
'Authorization': f'Bearer {self._get_user_token()}',
'Content-Type': 'application/json'
})['accessToken']
return self._MEDIATOKEN
def _perform_login(self, username, password):
self._get_media_token()
class IwaraIE(IwaraBaseIE):
IE_NAME = 'iwara'
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
# this video cannot be played because of migration
'only_matching': True,
'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
'info_dict': {
'id': 'k2ayoueezfkx6gvq',
@@ -79,25 +94,29 @@ class IwaraIE(IwaraBaseIE):
'timestamp': 1677843869,
'modified_timestamp': 1679056362,
},
'skip': 'this video cannot be played because of migration',
}, {
'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
'md5': '20691ce1473ec2766c0788e14c60ce66',
'md5': '7645f966f069b8ec9210efd9130c9aad',
'info_dict': {
'id': '1ywe1sbkqwumpdxz5',
'ext': 'mp4',
'age_limit': 18,
'title': 'Aponia 阿波尼亚SEX Party Tonight 手脱衣 大奶 裸腿',
'description': 'md5:0c4c310f2e0592d68b9f771d348329ca',
'uploader': '龙也zZZ',
'title': 'Aponia アポニア SEX Party Tonight 手脱衣 巨乳 ',
'description': 'md5:3f60016fff22060eef1ef26d430b1f67',
'uploader': 'Lyu ya',
'uploader_id': 'user792540',
'tags': [
'uncategorized'
],
'like_count': 1809,
'view_count': 25156,
'comment_count': 1,
'like_count': int,
'view_count': int,
'comment_count': int,
'timestamp': 1678732213,
'modified_timestamp': 1679110271,
'modified_timestamp': int,
'thumbnail': 'https://files.iwara.tv/image/thumbnail/581d12b5-46f4-4f15-beb2-cfe2cde5d13d/thumbnail-00.jpg',
'modified_date': '20230614',
'upload_date': '20230313',
},
}, {
'url': 'https://iwara.tv/video/blggmfno8ghl725bg',
@@ -112,12 +131,15 @@ class IwaraIE(IwaraBaseIE):
'tags': [
'pee'
],
'like_count': 192,
'view_count': 12119,
'comment_count': 0,
'like_count': int,
'view_count': int,
'comment_count': int,
'timestamp': 1598880567,
'modified_timestamp': 1598908995,
'availability': 'needs_auth',
'modified_timestamp': int,
'upload_date': '20200831',
'modified_date': '20230605',
'thumbnail': 'https://files.iwara.tv/image/thumbnail/7693e881-d302-42a4-a780-f16d66b5dadd/thumbnail-00.jpg',
# 'availability': 'needs_auth',
},
}]
@@ -142,17 +164,16 @@ class IwaraIE(IwaraBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
username, password = self._get_login_info()
headers = {
'Authorization': f'Bearer {self._get_media_token()}',
} if username and password else None
video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True, headers=headers)
username, _ = self._get_login_info()
video_data = self._download_json(
f'https://api.iwara.tv/video/{video_id}', video_id,
expected_status=lambda x: True, headers=self._get_media_token())
errmsg = video_data.get('message')
# at this point we can actually get uploaded user info, but do we need it?
if errmsg == 'errors.privateVideo':
self.raise_login_required('Private video. Login if you have permissions to watch')
self.raise_login_required('Private video. Login if you have permissions to watch', method='password')
elif errmsg == 'errors.notFound' and not username:
self.raise_login_required('Video may need login to view')
self.raise_login_required('Video may need login to view', method='password')
elif errmsg: # None if success
raise ExtractorError(f'Iwara says: {errmsg}')
@@ -181,15 +202,6 @@ class IwaraIE(IwaraBaseIE):
'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
}
def _perform_login(self, username, password):
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
self.write_debug('Skipping logging in')
return
IwaraBaseIE._USERTOKEN = self._get_user_token(True)
self._get_media_token(True)
self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN)
class IwaraUserIE(IwaraBaseIE):
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
@@ -200,12 +212,14 @@ class IwaraUserIE(IwaraBaseIE):
'url': 'https://iwara.tv/profile/user792540/videos',
'info_dict': {
'id': 'user792540',
'title': 'Lyu ya',
},
'playlist_mincount': 80,
'playlist_mincount': 70,
}, {
'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
'info_dict': {
'id': 'theblackbirdcalls',
'title': 'TheBlackbirdCalls',
},
'playlist_mincount': 723,
}, {
@@ -214,6 +228,13 @@ class IwaraUserIE(IwaraBaseIE):
}, {
'url': 'https://iwara.tv/profile/theblackbirdcalls',
'only_matching': True,
}, {
'url': 'https://www.iwara.tv/profile/lumymmd',
'info_dict': {
'id': 'lumymmd',
'title': 'Lumy MMD',
},
'playlist_mincount': 1,
}]
def _entries(self, playlist_id, user_id, page):
@@ -225,7 +246,7 @@ class IwaraUserIE(IwaraBaseIE):
'sort': 'date',
'user': user_id,
'limit': self._PER_PAGE,
})
}, headers=self._get_media_token())
for x in traverse_obj(videos, ('results', ..., 'id')):
yield self.url_result(f'https://iwara.tv/video/{x}')
@@ -244,7 +265,6 @@ class IwaraUserIE(IwaraBaseIE):
class IwaraPlaylistIE(IwaraBaseIE):
# the ID is an UUID but I don't think it's necessary to write concrete regex
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
IE_NAME = 'iwara:playlist'
_PER_PAGE = 32
@@ -260,7 +280,8 @@ class IwaraPlaylistIE(IwaraBaseIE):
def _entries(self, playlist_id, first_page, page):
videos = self._download_json(
'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
query={'page': page, 'limit': self._PER_PAGE}) if page else first_page
query={'page': page, 'limit': self._PER_PAGE},
headers=self._get_media_token()) if page else first_page
for x in traverse_obj(videos, ('results', ..., 'id')):
yield self.url_result(f'https://iwara.tv/video/{x}')
@@ -268,7 +289,7 @@ class IwaraPlaylistIE(IwaraBaseIE):
playlist_id = self._match_id(url)
page_0 = self._download_json(
f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
note='Requesting playlist info')
note='Requesting playlist info', headers=self._get_media_token())
return self.playlist_result(
OnDemandPagedList(

View File

@@ -1,8 +1,8 @@
import functools
import json
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
HEADRequest,
@@ -12,7 +12,10 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_qs,
traverse_obj,
try_get,
url_or_none,
urlhandle_detect_ext,
urljoin,
)
@@ -52,38 +55,25 @@ class LBRYBaseIE(InfoExtractor):
'/%s:%s' % (claim_name, claim_id))
def _parse_stream(self, stream, url):
stream_value = stream.get('value') or {}
stream_type = stream_value.get('stream_type')
source = stream_value.get('source') or {}
media = stream_value.get(stream_type) or {}
signing_channel = stream.get('signing_channel') or {}
channel_name = signing_channel.get('name')
channel_claim_id = signing_channel.get('claim_id')
channel_url = None
if channel_name and channel_claim_id:
channel_url = self._permanent_url(url, channel_name, channel_claim_id)
stream_type = traverse_obj(stream, ('value', 'stream_type', {str}))
info = traverse_obj(stream, {
'title': ('value', 'title', {str}),
'thumbnail': ('value', 'thumbnail', 'url', {url_or_none}),
'description': ('value', 'description', {str}),
'license': ('value', 'license', {str}),
'timestamp': ('timestamp', {int_or_none}),
'release_timestamp': ('value', 'release_time', {int_or_none}),
'tags': ('value', 'tags', ..., {lambda x: x or None}),
'duration': ('value', stream_type, 'duration', {int_or_none}),
'channel': ('signing_channel', 'value', 'title', {str}),
'channel_id': ('signing_channel', 'claim_id', {str}),
})
channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
if channel_name and info.get('channel_id'):
info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
info = {
'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
'description': stream_value.get('description'),
'license': stream_value.get('license'),
'timestamp': int_or_none(stream.get('timestamp')),
'release_timestamp': int_or_none(stream_value.get('release_time')),
'tags': stream_value.get('tags'),
'duration': int_or_none(media.get('duration')),
'channel': try_get(signing_channel, lambda x: x['value']['title']),
'channel_id': channel_claim_id,
'channel_url': channel_url,
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
'filesize': int_or_none(source.get('size')),
}
if stream_type == 'audio':
info['vcodec'] = 'none'
else:
info.update({
'width': int_or_none(media.get('width')),
'height': int_or_none(media.get('height')),
})
return info
@@ -186,6 +176,28 @@ class LBRYIE(LBRYBaseIE):
'license': 'None',
},
'params': {'skip_download': True}
}, {
# original quality format w/higher resolution than HLS formats
'url': 'https://odysee.com/@wickedtruths:2/Biotechnological-Invasion-of-Skin-(April-2023):4',
'md5': '305b0b3b369bde1b984961f005b67193',
'info_dict': {
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
'ext': 'mp4',
'title': 'Biotechnological Invasion of Skin (April 2023)',
'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
'channel': 'Wicked Truths',
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'timestamp': 1685790036,
'upload_date': '20230603',
'release_timestamp': 1685617473,
'release_date': '20230601',
'duration': 1063,
'thumbnail': 'https://thumbs.odycdn.com/4e6d39da4df0cfdad45f64e253a15959.webp',
'tags': ['smart skin surveillance', 'biotechnology invasion of skin', 'morgellons'],
'license': 'None',
'protocol': 'https', # test for direct mp4 download
},
}, {
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
'only_matching': True,
@@ -221,41 +233,64 @@ class LBRYIE(LBRYBaseIE):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
display_id = display_id.replace(':', '#')
display_id = compat_urllib_parse_unquote(display_id)
display_id = urllib.parse.unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
headers = {'Referer': 'https://odysee.com/'}
if result['value'].get('stream_type') in self._SUPPORTED_STREAM_TYPES:
formats = []
stream_type = traverse_obj(result, ('value', 'stream_type', {str}))
if stream_type in self._SUPPORTED_STREAM_TYPES:
claim_id, is_live = result['claim_id'], False
streaming_url = self._call_api_proxy(
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
# GET request returns original video/audio file if available
ext = urlhandle_detect_ext(self._request_webpage(
streaming_url, display_id, 'Checking for original quality', headers=headers))
if ext != 'm3u8':
formats.append({
'url': streaming_url,
'format_id': 'original',
'quality': 1,
**traverse_obj(result, ('value', {
'ext': ('source', (('name', {determine_ext}), ('media_type', {mimetype2ext}))),
'filesize': ('source', 'size', {int_or_none}),
'width': ('video', 'width', {int_or_none}),
'height': ('video', 'height', {int_or_none}),
}), get_all=False),
'vcodec': 'none' if stream_type == 'audio' else None,
})
# HEAD request returns redirect response to m3u8 URL if available
final_url = self._request_webpage(
HEADRequest(streaming_url), display_id, headers=headers,
note='Downloading streaming redirect url info').geturl()
elif result.get('value_type') == 'stream':
claim_id, is_live = result['signing_channel']['claim_id'], True
live_data = self._download_json(
'https://api.odysee.live/livestream/is_live', claim_id,
query={'channel_claim_id': claim_id},
note='Downloading livestream JSON metadata')['data']
streaming_url = final_url = live_data.get('VideoURL')
final_url = live_data.get('VideoURL')
# Upcoming videos may still give VideoURL
if not live_data.get('Live'):
streaming_url = final_url = None
final_url = None
self.raise_no_formats('This stream is not live', True, claim_id)
else:
raise UnsupportedError(url)
info = self._parse_stream(result, url)
if determine_ext(final_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
final_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', live=is_live, headers=headers)
else:
info['url'] = streaming_url
formats.extend(self._extract_m3u8_formats(
final_url, display_id, 'mp4', m3u8_id='hls', live=is_live, headers=headers))
return {
**info,
**self._parse_stream(result, url),
'id': claim_id,
'title': result['value']['title'],
'formats': formats,
'is_live': is_live,
'http_headers': headers,
}
@@ -299,14 +334,12 @@ class LBRYChannelIE(LBRYBaseIE):
if not (stream_claim_name and stream_claim_id):
continue
info = self._parse_stream(item, url)
info.update({
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': stream_claim_id,
'title': try_get(item, lambda x: x['value']['title']),
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
})
yield info
}
def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#')

View File

@@ -1,17 +1,17 @@
import base64
import time
import urllib.error
import uuid
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_resolution,
traverse_obj,
try_get,
url_or_none,
urljoin,
)
@@ -30,16 +30,18 @@ class MGTVIE(InfoExtractor):
'duration': 7461,
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://w.mgtv.com/b/427837/15588271.html',
'info_dict': {
'id': '15588271',
'ext': 'mp4',
'title': '春日迟迟再出发 沉浸版',
'title': '春日迟迟再出发 沉浸版第1期陆莹结婚半年查出肾炎被离婚 吴雅婷把一半票根退给前夫',
'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6',
'thumbnail': r're:^https?://.+\.jpg',
'duration': 4026,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://w.mgtv.com/b/333652/7329822.html',
'info_dict': {
@@ -50,6 +52,7 @@ class MGTVIE(InfoExtractor):
'thumbnail': r're:^https?://.+\.jpg',
'duration': 2656,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://w.mgtv.com/b/427837/15591647.html',
'only_matching': True,
@@ -64,6 +67,13 @@ class MGTVIE(InfoExtractor):
'only_matching': True,
}]
_RESOLUTIONS = {
'标清': ('480p', '854x480'),
'高清': ('540p', '960x540'),
'超清': ('720p', '1280x720'),
'蓝光': ('1080p', '1920x1080'),
}
def _real_extract(self, url):
video_id = self._match_id(url)
tk2 = base64.urlsafe_b64encode(
@@ -76,55 +86,60 @@ class MGTVIE(InfoExtractor):
'type': 'pch5'
}, headers=self.geo_verification_headers())['data']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), None)
if error.get('code') == 40005:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
raise ExtractorError(error['msg'], expected=True)
raise
info = api_data['info']
title = info['title'].strip()
stream_data = self._download_json(
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
'pm2': api_data['atc']['pm2'],
'tk2': tk2,
'pm2': api_data['atc']['pm2'],
'video_id': video_id,
'type': 'pch5',
'src': 'intelmgtv',
}, headers=self.geo_verification_headers())['data']
stream_domain = stream_data['stream_domain'][0]
stream_domain = traverse_obj(stream_data, ('stream_domain', ..., {url_or_none}), get_all=False)
formats = []
for idx, stream in enumerate(stream_data['stream']):
stream_path = stream.get('url')
if not stream_path:
continue
format_data = self._download_json(
stream_domain + stream_path, video_id,
note=f'Download video info for format #{idx}')
format_url = format_data.get('info')
for idx, stream in enumerate(traverse_obj(stream_data, ('stream', lambda _, v: v['url']))):
stream_name = traverse_obj(stream, 'name', 'standardName', 'barName', expected_type=str)
resolution = traverse_obj(
self._RESOLUTIONS, (stream_name, 1 if stream.get('scale') == '16:9' else 0))
format_url = traverse_obj(self._download_json(
urljoin(stream_domain, stream['url']), video_id, fatal=False,
note=f'Downloading video info for format {resolution or stream_name}'),
('info', {url_or_none}))
if not format_url:
continue
tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
formats.append({
'format_id': compat_str(tbr or idx),
'url': url_or_none(format_url),
'format_id': str(tbr or idx),
'url': format_url,
'ext': 'mp4',
'tbr': tbr,
'vcodec': stream.get('videoFormat'),
'acodec': stream.get('audioFormat'),
**parse_resolution(resolution),
'protocol': 'm3u8_native',
'http_headers': {
'Referer': url,
},
'format_note': stream.get('name'),
'format_note': stream_name,
})
return {
'id': video_id,
'title': title,
'formats': formats,
'description': info.get('desc'),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('thumb'),
**traverse_obj(api_data, ('info', {
'title': ('title', {str.strip}),
'description': ('desc', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('thumb', {url_or_none}),
})),
'subtitles': self.extract_subtitles(video_id, stream_domain),
}

View File

@@ -1,32 +1,39 @@
import datetime
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
InAdvancePagedList,
orderedSet,
OnDemandPagedList,
remove_end,
str_to_int,
unified_strdate,
)
class MotherlessIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/|G[VIG]?[A-F0-9]+/)?(?P<id>[A-F0-9]+)'
_TESTS = [{
'url': 'http://motherless.com/AC3FFE1',
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
'url': 'http://motherless.com/EE97006',
'md5': 'cb5e7438f7a3c4e886b7bccc1292a3bc',
'info_dict': {
'id': 'AC3FFE1',
'id': 'EE97006',
'ext': 'mp4',
'title': 'Fucked in the ass while playing PS3',
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup',
'title': 'Dogging blond Brit getting glazed (comp)',
'categories': ['UK', 'slag', 'whore', 'dogging', 'cunt', 'cumhound', 'big tits', 'Pearl Necklace'],
'upload_date': '20230519',
'uploader_id': 'deathbird',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
}
'comment_count': int,
'view_count': int,
'like_count': int,
},
'params': {
# Incomplete cert chains
'nocheckcertificate': True,
},
}, {
'url': 'http://motherless.com/532291B',
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
@@ -49,16 +56,36 @@ class MotherlessIE(InfoExtractor):
'id': '633979F',
'ext': 'mp4',
'title': 'Turtlette',
'categories': ['superheroine heroine superher'],
'categories': ['superheroine heroine superher'],
'upload_date': '20140827',
'uploader_id': 'shade0230',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
}
'like_count': int,
'comment_count': int,
'view_count': int,
},
'params': {
'nocheckcertificate': True,
},
}, {
# no keywords
'url': 'http://motherless.com/8B4BBC1',
'only_matching': True,
'info_dict': {
'id': '8B4BBC1',
'ext': 'mp4',
'title': 'VIDEO00441.mp4',
'categories': [],
'upload_date': '20160214',
'uploader_id': 'NMWildGirl',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
'like_count': int,
'comment_count': int,
'view_count': int,
},
'params': {
'nocheckcertificate': True,
},
}, {
# see https://motherless.com/videos/recent for recent videos with
# uploaded date in "ago" format
@@ -72,9 +99,12 @@ class MotherlessIE(InfoExtractor):
'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
'like_count': int,
'comment_count': int,
'view_count': int,
},
'params': {
'skip_download': True,
'nocheckcertificate': True,
},
}]
@@ -128,10 +158,8 @@ class MotherlessIE(InfoExtractor):
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
webpage, 'uploader_id', fatal=False)
categories = self._html_search_meta('keywords', webpage, default=None)
if categories:
categories = [cat.strip() for cat in categories.split(',')]
categories = self._html_search_meta('keywords', webpage, default='')
categories = [cat.strip() for cat in categories.split(',') if cat.strip()]
return {
'id': video_id,
@@ -148,102 +176,97 @@ class MotherlessIE(InfoExtractor):
}
class MotherlessGroupIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
class MotherlessPaginatedIE(InfoExtractor):
_PAGE_SIZE = 60
def _correct_path(self, url, item_id):
raise NotImplementedError('This method must be implemented by subclasses')
def _extract_entries(self, webpage, base):
for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)',
webpage):
video_url = urllib.parse.urljoin(base, mobj.group('href'))
video_id = MotherlessIE.get_temp_id(video_url)
if video_id:
yield self.url_result(video_url, MotherlessIE, video_id, mobj.group('title'))
def _real_extract(self, url):
item_id = self._match_id(url)
real_url = self._correct_path(url, item_id)
webpage = self._download_webpage(real_url, item_id, 'Downloading page 1')
def get_page(idx):
page = idx + 1
current_page = webpage if not idx else self._download_webpage(
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
yield from self._extract_entries(current_page, real_url)
return self.playlist_result(
OnDemandPagedList(get_page, self._PAGE_SIZE), item_id,
remove_end(self._html_extract_title(webpage), ' | MOTHERLESS.COM ™'))
class MotherlessGroupIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/g[vifm]?/(?P<id>[a-z0-9_]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'http://motherless.com/g/movie_scenes',
'url': 'http://motherless.com/gv/movie_scenes',
'info_dict': {
'id': 'movie_scenes',
'title': 'Movie Scenes',
'description': 'Hot and sexy scenes from "regular" movies... '
'Beautiful actresses fully nude... A looot of '
'skin! :)Enjoy!',
},
'playlist_mincount': 662,
'playlist_mincount': 540,
}, {
'url': 'http://motherless.com/gv/sex_must_be_funny',
'url': 'http://motherless.com/g/sex_must_be_funny',
'info_dict': {
'id': 'sex_must_be_funny',
'title': 'Sex must be funny',
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
'any kind!'
},
'playlist_mincount': 0,
'expected_warnings': [
'This group has no videos.',
]
'playlist_count': 0,
}, {
'url': 'https://motherless.com/g/beautiful_cock',
'url': 'https://motherless.com/gv/beautiful_cock',
'info_dict': {
'id': 'beautiful_cock',
'title': 'Beautiful Cock',
'description': 'Group for lovely cocks yours, mine, a friends anything human',
},
'playlist_mincount': 2500,
'playlist_mincount': 2040,
}]
@classmethod
def suitable(cls, url):
return (False if MotherlessIE.suitable(url)
else super(MotherlessGroupIE, cls).suitable(url))
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/gv/{item_id}')
def _extract_entries(self, webpage, base):
entries = []
for mobj in re.finditer(
r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
webpage):
video_url = compat_urlparse.urljoin(base, mobj.group('href'))
if not MotherlessIE.suitable(video_url):
continue
video_id = MotherlessIE._match_id(video_url)
title = mobj.group('title')
entries.append(self.url_result(
video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
video_title=title))
# Alternative fallback
if not entries:
entries = [
self.url_result(
compat_urlparse.urljoin(base, '/' + entry_id),
ie=MotherlessIE.ie_key(), video_id=entry_id)
for entry_id in orderedSet(re.findall(
r'data-codename=["\']([A-Z0-9]+)', webpage))]
return entries
def _real_extract(self, url):
group_id = self._match_id(url)
page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
webpage = self._download_webpage(page_url, group_id)
title = self._search_regex(
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta(
'description', webpage, fatal=False)
page_count = str_to_int(self._search_regex(
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
webpage, 'page_count', default=0))
if not page_count:
message = self._search_regex(
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
webpage, 'error_msg', default=None) or 'This group has no videos.'
self.report_warning(message, group_id)
page_count = 1
PAGE_SIZE = 80
class MotherlessGalleryIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/G[VIG]?(?P<id>[A-F0-9]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://motherless.com/GV338999F',
'info_dict': {
'id': '338999F',
'title': 'Random',
},
'playlist_mincount': 190,
}, {
'url': 'https://motherless.com/GVABD6213',
'info_dict': {
'id': 'ABD6213',
'title': 'Cuties',
},
'playlist_mincount': 2,
}, {
'url': 'https://motherless.com/GVBCF7622',
'info_dict': {
'id': 'BCF7622',
'title': 'Vintage',
},
'playlist_count': 0,
}, {
'url': 'https://motherless.com/G035DE2F',
'info_dict': {
'id': '035DE2F',
'title': 'General',
},
'playlist_mincount': 420,
}]
def _get_page(idx):
if idx > 0:
webpage = self._download_webpage(
page_url, group_id, query={'page': idx + 1},
note='Downloading page %d/%d' % (idx + 1, page_count)
)
for entry in self._extract_entries(webpage, url):
yield entry
playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
return {
'_type': 'playlist',
'id': group_id,
'title': title,
'description': description,
'entries': playlist
}
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/GV{item_id}')

View File

@@ -3,7 +3,7 @@ import json
import urllib.error
from .common import InfoExtractor
from ..utils import ExtractorError, parse_iso8601
from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
@@ -65,19 +65,20 @@ class NebulaBaseIE(InfoExtractor):
return response['token']
def _fetch_video_formats(self, slug):
stream_info = self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/stream/',
stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/',
video_id=slug,
auth_type='bearer',
note='Fetching video stream info')
manifest_url = stream_info['manifest']
return self._extract_m3u8_formats_and_subtitles(manifest_url, slug)
return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4')
def _build_video_info(self, episode):
fmts, subs = self._fetch_video_formats(episode['slug'])
channel_slug = episode['channel_slug']
channel_title = episode['channel_title']
zype_id = episode.get('zype_id')
return {
'id': episode['zype_id'],
'id': remove_start(episode['id'], 'video_episode:'),
'display_id': episode['slug'],
'formats': fmts,
'subtitles': subs,
@@ -99,6 +100,9 @@ class NebulaBaseIE(InfoExtractor):
'uploader_url': f'https://nebula.tv/{channel_slug}',
'series': channel_title,
'creator': channel_title,
'extractor_key': NebulaIE.ie_key(),
'extractor': NebulaIE.IE_NAME,
'_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None,
}
def _perform_login(self, username=None, password=None):
@@ -113,7 +117,7 @@ class NebulaIE(NebulaBaseIE):
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'md5': '14944cfee8c7beeea106320c47560efc',
'info_dict': {
'id': '5c271b40b13fd613090034fd',
'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
'ext': 'mp4',
'title': 'That Time Disney Remade Beauty and the Beast',
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We werent able to remove it without reducing video quality, so its presented here in its original context.',
@@ -137,22 +141,22 @@ class NebulaIE(NebulaBaseIE):
'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
'md5': 'd05739cf6c38c09322422f696b569c23',
'info_dict': {
'id': '5e7e78171aaf320001fbd6be',
'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
'ext': 'mp4',
'title': 'Landing Craft - How The Allies Got Ashore',
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
'upload_date': '20200327',
'timestamp': 1585348140,
'channel': 'Real Engineering',
'channel_id': 'realengineering',
'uploader': 'Real Engineering',
'uploader_id': 'realengineering',
'series': 'Real Engineering',
'channel': 'Real Engineering — The Logistics of D-Day',
'channel_id': 'd-day',
'uploader': 'Real Engineering — The Logistics of D-Day',
'uploader_id': 'd-day',
'series': 'Real Engineering — The Logistics of D-Day',
'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
'creator': 'Real Engineering',
'creator': 'Real Engineering — The Logistics of D-Day',
'duration': 841,
'channel_url': 'https://nebula.tv/realengineering',
'uploader_url': 'https://nebula.tv/realengineering',
'channel_url': 'https://nebula.tv/d-day',
'uploader_url': 'https://nebula.tv/d-day',
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
},
},
@@ -160,7 +164,7 @@ class NebulaIE(NebulaBaseIE):
'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
'md5': 'ebe28a7ad822b9ee172387d860487868',
'info_dict': {
'id': '5e779ebdd157bc0001d1c75a',
'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
'ext': 'mp4',
'title': 'Episode 1: The Draw',
'description': r'contains:Theres free money on offer… if the players can all work together.',
@@ -190,7 +194,7 @@ class NebulaIE(NebulaBaseIE):
]
def _fetch_video_metadata(self, slug):
return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/',
video_id=slug,
auth_type='bearer',
note='Fetching video meta data')

View File

@@ -2,12 +2,15 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
parse_duration,
traverse_obj,
unescapeHTML,
unified_timestamp,
url_or_none,
urljoin,
url_or_none
)
@@ -492,3 +495,73 @@ class NhkRadioNewsPageIE(InfoExtractor):
def _real_extract(self, url):
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
class NhkRadiruLiveIE(InfoExtractor):
_GEO_COUNTRIES = ['JP']
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)'
_TESTS = [{
# radio 1, no area specified
'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
'info_dict': {
'id': 'r1-tokyo',
'title': 're:^ネットラジオ第1 東京.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
'live_status': 'is_live',
},
}, {
# radio 2, area specified
# (the area doesnt actually matter, r2 is national)
'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
'info_dict': {
'id': 'r2-fukuoka',
'title': 're:^ネットラジオ第2 福岡.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
'live_status': 'is_live',
},
}, {
# fm, area specified
'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
'info_dict': {
'id': 'fm-sapporo',
'title': 're:^NHKネットラジオFM 札幌.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
'live_status': 'is_live',
}
}]
_NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
def _real_extract(self, url):
station = self._match_id(url)
area = self._configuration_arg('area', ['tokyo'])[0]
config = self._download_xml(
'https://www.nhk.or.jp/radio/config/config_web.xml', station, 'Downloading area information')
data = config.find(f'.//data//area[.="{area}"]/..')
if not data:
raise ExtractorError('Invalid area. Valid areas are: %s' % ', '.join(
[i.text for i in config.findall('.//data//area')]), expected=True)
noa_info = self._download_json(
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
station, note=f'Downloading {area} station metadata')
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
return {
'title': ' '.join(traverse_obj(present_info, (('service', 'area',), 'name', {str}))),
'id': join_nonempty(station, area),
'thumbnails': traverse_obj(present_info, ('service', 'images', ..., {
'url': 'url',
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
})),
'formats': self._extract_m3u8_formats(data.find(f'{station}hls').text, station),
'is_live': True,
}

View File

@@ -1,3 +1,5 @@
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
@@ -7,6 +9,7 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
HEADRequest,
float_or_none,
int_or_none,
qualities,
@@ -15,6 +18,7 @@ from ..utils import (
unescapeHTML,
unified_strdate,
unsmuggle_url,
url_or_none,
urlencode_postdata,
)
@@ -41,7 +45,7 @@ class OdnoklassnikiIE(InfoExtractor):
'ext': 'mp4',
'timestamp': 1545580896,
'view_count': int,
'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Народная забава',
'uploader': 'Nevata',
'upload_date': '20181223',
@@ -65,13 +69,14 @@ class OdnoklassnikiIE(InfoExtractor):
'title': str,
'uploader': str,
},
'skip': 'vk extractor error',
}, {
# metadata in JSON
# metadata in JSON, webm_dash with Firefox UA
'url': 'http://ok.ru/video/20079905452',
'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
'md5': '8f477d8931c531374a3e36daec617b2c',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
'ext': 'webm',
'title': 'Культура меняет нас (прекрасный ролик!))',
'thumbnail': str,
'duration': 100,
@@ -81,10 +86,14 @@ class OdnoklassnikiIE(InfoExtractor):
'like_count': int,
'age_limit': 0,
},
'params': {
'format': 'bv[ext=webm]',
'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0'},
},
}, {
# metadataUrl
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
'md5': 'f8c951122516af72e6e6ffdd3c41103b',
'md5': '2bae2f58eefe1b3d26f3926c4a64d2f3',
'info_dict': {
'id': '63567059965189-0',
'ext': 'mp4',
@@ -98,10 +107,11 @@ class OdnoklassnikiIE(InfoExtractor):
'age_limit': 0,
'start_time': 5,
},
'params': {'skip_download': 'm3u8'},
}, {
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
'url': 'https://ok.ru/video/3952212382174',
'md5': '91749d0bd20763a28d083fa335bbd37a',
'md5': '5fb5f83ce16cb212d6bf887282b5da53',
'info_dict': {
'id': '5axVgHHDBvU',
'ext': 'mp4',
@@ -116,7 +126,7 @@ class OdnoklassnikiIE(InfoExtractor):
'live_status': 'not_live',
'view_count': int,
'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
'uploader_url': 'https://www.youtube.com/@MrKewlkid94',
'channel_follower_count': int,
'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
@@ -145,7 +155,6 @@ class OdnoklassnikiIE(InfoExtractor):
},
'skip': 'Video has not been found',
}, {
# TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
'note': 'Only available in mobile webpage',
'url': 'https://m.ok.ru/video/2361249957145',
'info_dict': {
@@ -153,8 +162,8 @@ class OdnoklassnikiIE(InfoExtractor):
'ext': 'mp4',
'title': 'Быковское крещение',
'duration': 3038.181,
'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
},
'skip': 'HTTP Error 400',
}, {
'note': 'subtitles',
'url': 'https://ok.ru/video/4249587550747',
@@ -226,6 +235,12 @@ class OdnoklassnikiIE(InfoExtractor):
'skip': 'Site no longer embeds',
}]
def _clear_cookies(self, cdn_url):
# Direct http downloads will fail if CDN cookies are set
# so we need to reset them after each format extraction
self.cookiejar.clear(domain='.mycdn.me')
self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname)
@classmethod
def _extract_embed_urls(cls, url, webpage):
for x in super()._extract_embed_urls(url, webpage):
@@ -364,14 +379,22 @@ class OdnoklassnikiIE(InfoExtractor):
formats = [{
'url': f['url'],
'ext': 'mp4',
'format_id': f['name'],
} for f in metadata['videos']]
'format_id': f.get('name'),
} for f in traverse_obj(metadata, ('videos', lambda _, v: url_or_none(v['url'])))]
m3u8_url = metadata.get('hlsManifestUrl')
m3u8_url = traverse_obj(metadata, 'hlsManifestUrl', 'ondemandHls')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._clear_cookies(m3u8_url)
for mpd_id, mpd_key in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
mpd_url = metadata.get(mpd_key)
if mpd_url:
formats.extend(self._extract_mpd_formats(
mpd_url, video_id, mpd_id=mpd_id, fatal=False))
self._clear_cookies(mpd_url)
dash_manifest = metadata.get('metadataEmbedded')
if dash_manifest:
@@ -390,6 +413,7 @@ class OdnoklassnikiIE(InfoExtractor):
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
self._clear_cookies(m3u8_url)
rtmp_url = metadata.get('rtmpUrl')
if rtmp_url:
formats.append({
@@ -423,6 +447,10 @@ class OdnoklassnikiIE(InfoExtractor):
r'data-video="(.+?)"', webpage, 'json data')
json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
redirect_url = self._request_webpage(HEADRequest(
json_data['videoSrc']), video_id, 'Requesting download URL').geturl()
self._clear_cookies(redirect_url)
return {
'id': video_id,
'title': json_data.get('videoName'),
@@ -430,7 +458,7 @@ class OdnoklassnikiIE(InfoExtractor):
'thumbnail': json_data.get('videoPosterSrc'),
'formats': [{
'format_id': 'mobile',
'url': json_data.get('videoSrc'),
'url': redirect_url,
'ext': 'mp4',
}]
}

View File

@@ -0,0 +1,94 @@
from .common import InfoExtractor
from ..utils import extract_attributes, merge_dicts, remove_end
class RheinMainTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)'
_TESTS = [{
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/',
'info_dict': {
'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022',
'ext': 'ismv', # ismv+isma will be merged into mp4
'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft',
'title': 'Auf dem Weg zur Deutschen Meisterschaft',
'upload_date': '20221108',
'view_count': int,
'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft',
'thumbnail': r're:^https://.+\.jpg',
'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9',
'timestamp': 1667933057,
'duration': 243.0,
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
'info_dict': {
'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022',
'ext': 'ismv',
'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
'timestamp': 1668526214,
'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften',
'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
'view_count': int,
'thumbnail': r're:^https://.+\.jpg',
'duration': 345.0,
'description': 'md5:9370ba29526984006c2cba1372e5c5a0',
'upload_date': '20221115',
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
'info_dict': {
'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022',
'ext': 'ismv',
'title': 'Casino Mainz bei den Deutschen Meisterschaften',
'view_count': int,
'timestamp': 1668527402,
'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften',
'upload_date': '20221115',
'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften',
'duration': 348.0,
'thumbnail': r're:^https://.+\.jpg',
'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa',
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = mobj.group('display_id')
video_id = mobj.group('video_id').replace('/', '-')
webpage = self._download_webpage(url, video_id)
source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)',
webpage, 'video', group=('source', 'img'))
source = extract_attributes(source)
img = extract_attributes(img)
raw_json_ld = list(self._yield_json_ld(webpage, video_id))
json_ld = self._json_ld(raw_json_ld, video_id)
json_ld.pop('url', None)
ism_manifest_url = (
source.get('src')
or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject')
)
formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id)
return merge_dicts({
'id': video_id,
'display_id': display_id,
'title':
self._html_search_regex(r'<h1><span class="title">([^<]*)</span>',
webpage, 'headline', default=None)
or img.get('title') or json_ld.get('title') or self._og_search_title(webpage)
or remove_end(self._html_extract_title(webpage), ' -'),
'alt_title': img.get('alt'),
'description': json_ld.get('description') or self._og_search_description(webpage),
'formats': formats,
'subtitles': subtitles,
'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'),
}, json_ld)

View File

@@ -1,10 +1,15 @@
import itertools
import urllib.error
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
remove_start,
str_or_none,
traverse_obj,
unified_timestamp,
url_or_none,
)
@@ -51,7 +56,40 @@ class RozhlasIE(InfoExtractor):
}
class RozhlasVltavaIE(InfoExtractor):
class RozhlasBaseIE(InfoExtractor):
def _extract_formats(self, entry, audio_id):
formats = []
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
ext = audio.get('variant')
for retry in self.RetryManager():
if retry.attempt > 1:
self._sleep(1, audio_id)
try:
if ext == 'dash':
formats.extend(self._extract_mpd_formats(
audio['url'], audio_id, mpd_id=ext))
elif ext == 'hls':
formats.extend(self._extract_m3u8_formats(
audio['url'], audio_id, 'm4a', m3u8_id=ext))
else:
formats.append({
'url': audio['url'],
'ext': ext,
'format_id': ext,
'abr': int_or_none(audio.get('bitrate')),
'acodec': ext,
'vcodec': 'none',
})
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 429:
retry.error = e.cause
else:
self.report_warning(e.msg)
return formats
class RozhlasVltavaIE(RozhlasBaseIE):
_VALID_URL = r'https?://(?:\w+\.rozhlas|english\.radio)\.cz/[\w-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
@@ -168,33 +206,14 @@ class RozhlasVltavaIE(InfoExtractor):
}]
def _extract_video(self, entry):
formats = []
audio_id = entry['meta']['ga']['contentId']
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
ext = audio.get('variant')
if ext == 'dash':
formats.extend(self._extract_mpd_formats(
audio['url'], audio_id, mpd_id=ext, fatal=False))
elif ext == 'hls':
formats.extend(self._extract_m3u8_formats(
audio['url'], audio_id, 'm4a', m3u8_id=ext, fatal=False))
else:
formats.append({
'url': audio['url'],
'ext': ext,
'format_id': ext,
'abr': int_or_none(audio.get('bitrate')),
'acodec': ext,
'vcodec': 'none',
})
chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none}))
return {
'id': audio_id,
'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
'chapter_number': chapter_number,
'formats': formats,
'formats': self._extract_formats(entry, audio_id),
**traverse_obj(entry, {
'title': ('meta', 'ga', 'contentName'),
'description': 'title',
@@ -219,3 +238,106 @@ class RozhlasVltavaIE(InfoExtractor):
'title': traverse_obj(data, ('series', 'title')),
'entries': map(self._extract_video, data['playlist']),
}
class MujRozhlasIE(RozhlasBaseIE):
_VALID_URL = r'https?://(?:www\.)?mujrozhlas\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
# single episode extraction
'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
'md5': '6f8fd68663e64936623e67c152a669e0',
'info_dict': {
'id': '10739193',
'ext': 'mp3',
'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
'timestamp': 1684915200,
'modified_timestamp': 1684922446,
'series': 'Vykopávky',
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
'channel_id': 'radio-wave',
'upload_date': '20230524',
'modified_date': '20230524',
},
}, {
# serial extraction
'url': 'https://www.mujrozhlas.cz/radiokniha/jaroslava-janackova-pribeh-tajemneho-psani-o-pramenech-genezi-babicky',
'playlist_mincount': 7,
'info_dict': {
'id': 'bb2b5f4e-ffb4-35a6-a34a-046aa62d6f6b',
'title': 'Jaroslava Janáčková: Příběh tajemného psaní. O pramenech a genezi Babičky',
'description': 'md5:7434d8fac39ac9fee6df098e11dfb1be',
},
}, {
# show extraction
'url': 'https://www.mujrozhlas.cz/nespavci',
'playlist_mincount': 14,
'info_dict': {
'id': '09db9b37-d0f4-368c-986a-d3439f741f08',
'title': 'Nespavci',
'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
},
}]
def _call_api(self, path, item_id, msg='API JSON'):
return self._download_json(
f'https://api.mujrozhlas.cz/{path}/{item_id}', item_id,
note=f'Downloading {msg}', errnote=f'Failed to download {msg}')['data']
def _extract_audio_entry(self, entry):
audio_id = entry['meta']['ga']['contentId']
return {
'id': audio_id,
'formats': self._extract_formats(entry['attributes'], audio_id),
**traverse_obj(entry, {
'title': ('attributes', 'title'),
'description': ('attributes', 'description'),
'episode_number': ('attributes', 'part'),
'series': ('attributes', 'mirroredShow', 'title'),
'chapter': ('attributes', 'mirroredSerial', 'title'),
'artist': ('meta', 'ga', 'contentAuthor'),
'channel_id': ('meta', 'ga', 'contentCreator'),
'timestamp': ('attributes', 'since', {unified_timestamp}),
'modified_timestamp': ('attributes', 'updated', {unified_timestamp}),
'thumbnail': ('attributes', 'asset', 'url', {url_or_none}),
})
}
def _entries(self, api_url, playlist_id):
for page in itertools.count(1):
episodes = self._download_json(
api_url, playlist_id, note=f'Downloading episodes page {page}',
errnote=f'Failed to download episodes page {page}', fatal=False)
for episode in traverse_obj(episodes, ('data', lambda _, v: v['meta']['ga']['contentId'])):
yield self._extract_audio_entry(episode)
api_url = traverse_obj(episodes, ('links', 'next', {url_or_none}))
if not api_url:
break
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
info = self._search_json(r'\bvar\s+dl\s*=', webpage, 'info json', display_id)
entity = info['siteEntityBundle']
if entity == 'episode':
return self._extract_audio_entry(self._call_api(
'episodes', info['contentId'], 'episode info API JSON'))
elif entity in ('show', 'serial'):
playlist_id = info['contentShow'].split(':')[0] if entity == 'show' else info['contentId']
data = self._call_api(f'{entity}s', playlist_id, f'{entity} playlist JSON')
api_url = data['relationships']['episodes']['links']['related']
return self.playlist_result(
self._entries(api_url, playlist_id), playlist_id,
**traverse_obj(data, ('attributes', {
'title': 'title',
'description': 'description',
})))
else:
# `entity == 'person'` not implemented yet by API, ref:
# https://api.mujrozhlas.cz/persons/8367e456-2a57-379a-91bb-e699619bea49/participation
raise ExtractorError(f'Unsupported entity type "{entity}"')

View File

@@ -144,7 +144,7 @@ class RumbleEmbedIE(InfoExtractor):
if embeds:
return embeds
return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -73,7 +73,10 @@ class ShemarooMeIE(InfoExtractor):
key = bytes_to_intlist(compat_b64decode(data_json['key']))
iv = [0] * 16
m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii')
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
headers = {'stream_key': data_json['stream_key']}
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers=headers)
for fmt in formats:
fmt['http_headers'] = headers
release_date = self._html_search_regex(
(r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'),

View File

@@ -10,6 +10,8 @@ from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
jwt_decode_hs256,
try_call,
try_get,
)
@@ -77,8 +79,10 @@ class SonyLIVIE(InfoExtractor):
self._HEADERS['device_id'] = self._get_device_id()
self._HEADERS['content-type'] = 'application/json'
if username.lower() == 'token' and len(password) > 1198:
if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
self._HEADERS['authorization'] = password
self.report_login()
return
elif len(username) != 10 or not username.isdigit():
raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')

View File

@@ -42,14 +42,13 @@ class StripchatIE(InfoExtractor):
elif not traverse_obj(data, ('viewCam', 'model', 'isLive'), expected_type=bool):
raise UserNotLive(video_id=video_id)
server = traverse_obj(data, ('viewCam', 'viewServers', 'flashphoner-hls'), expected_type=str)
model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int)
formats = []
for host in traverse_obj(data, ('config', 'data', (
(('features', 'featuresV2'), 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))):
formats = self._extract_m3u8_formats(
f'https://b-{server}.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
f'https://edge-hls.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
if formats:
break

View File

@@ -2,7 +2,7 @@ import re
import urllib.parse
from .common import InfoExtractor
from ..utils import str_or_none, traverse_obj
from ..utils import js_to_json, str_or_none, traverse_obj
class SubstackIE(InfoExtractor):
@@ -14,7 +14,7 @@ class SubstackIE(InfoExtractor):
'id': '47660949',
'ext': 'mp4',
'title': 'I MADE A VLOG',
'description': 'md5:10c01ff93439a62e70ce963b2aa0b7f6',
'description': 'md5:9248af9a759321e1027226f988f54d96',
'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18',
'uploader': 'Maybe Baby',
'uploader_id': '33628',
@@ -77,7 +77,9 @@ class SubstackIE(InfoExtractor):
display_id, username = self._match_valid_url(url).group('id', 'username')
webpage = self._download_webpage(url, display_id)
webpage_info = self._search_json(r'<script[^>]*>\s*window\._preloads\s*=', webpage, 'preloads', display_id)
webpage_info = self._parse_json(self._search_json(
r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string',
display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id)
post_type = webpage_info['post']['type']
formats, subtitles = [], {}

View File

@@ -1,8 +1,13 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
extract_attributes,
get_element_by_id,
get_element_html_by_class,
int_or_none,
str_or_none,
traverse_obj,
url_or_none,
)
@@ -21,7 +26,15 @@ class SverigesRadioBaseIE(InfoExtractor):
}
def _real_extract(self, url):
audio_id = self._match_id(url)
audio_id, display_id = self._match_valid_url(url).group('id', 'slug')
if not audio_id:
webpage = self._download_webpage(url, display_id)
audio_id = (
traverse_obj(
get_element_html_by_class('audio-button', webpage),
({extract_attributes}, ('data-audio-id', 'data-publication-id')), get_all=False)
or self._parse_json(get_element_by_id('gtm-metadata', webpage), display_id)['pageId'])
query = {
'id': audio_id,
'type': self._AUDIO_TYPE,
@@ -30,7 +43,6 @@ class SverigesRadioBaseIE(InfoExtractor):
item = self._download_json(
self._BASE_URL + 'audiometadata', audio_id,
'Downloading audio JSON metadata', query=query)['items'][0]
title = item['subtitle']
query['format'] = 'iis'
urls = []
@@ -61,18 +73,20 @@ class SverigesRadioBaseIE(InfoExtractor):
return {
'id': audio_id,
'title': title,
'formats': formats,
'series': item.get('title'),
'duration': int_or_none(item.get('duration')),
'thumbnail': item.get('displayimageurl'),
'description': item.get('description'),
**traverse_obj(item, {
'title': 'subtitle',
'series': 'title',
'duration': ('duration', {int_or_none}),
'thumbnail': ('displayimageurl', {url_or_none}),
'description': 'description',
}),
}
class SverigesRadioPublicationIE(SverigesRadioBaseIE):
IE_NAME = 'sverigesradio:publication'
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?(?:artikel|gruppsida)(?:\.aspx\?.*?\bartikel=(?P<id>[0-9]+)|/(?P<slug>[\w-]+))'
_TESTS = [{
'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
'md5': '6a4917e1923fccb080e5a206a5afa542',
@@ -85,6 +99,18 @@ class SverigesRadioPublicationIE(SverigesRadioBaseIE):
'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'https://sverigesradio.se/artikel/tysk-fotbollsfeber-bayern-munchens-10-ariga-segersvit-kan-brytas',
'md5': 'f8a914ad50f491bb74eed403ab4bfef6',
'info_dict': {
'id': '8360345',
'ext': 'm4a',
'title': 'Tysk fotbollsfeber när Bayern Münchens 10-åriga segersvit kan brytas',
'series': 'Radiosporten',
'description': 'md5:5254610e20ce527ecb3a6102a06dcc5f',
'duration': 72,
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
'only_matching': True,
@@ -94,8 +120,8 @@ class SverigesRadioPublicationIE(SverigesRadioBaseIE):
class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
IE_NAME = 'sverigesradio:episode'
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?:(?P<id>\d+)|(?P<slug>[\w-]+))(?:$|[#?])'
_TESTS = [{
'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
'md5': '20dc4d8db24228f846be390b0c59a07c',
'info_dict': {
@@ -106,6 +132,18 @@ class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
'title': 'Metoo och valen',
'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
'thumbnail': r're:^https?://.*\.jpg',
}
}
},
}, {
'url': 'https://sverigesradio.se/avsnitt/p4-live-med-first-aid-kit-scandinavium-mars-2023',
'md5': 'ce17fb82520a8033dbb846993d5589fe',
'info_dict': {
'id': '2160416',
'ext': 'm4a',
'title': 'P4 Live med First Aid Kit',
'description': 'md5:6d5b78eed3d2b65f6de04daa45e9285d',
'thumbnail': r're:^https?://.*\.jpg',
'series': 'P4 Live',
'duration': 5640,
},
}]
_AUDIO_TYPE = 'episode'

View File

@@ -2,10 +2,12 @@ import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
UnsupportedError,
extract_attributes,
try_get,
int_or_none,
js_to_json,
parse_iso8601,
try_get,
)
@@ -14,36 +16,38 @@ class TagesschauIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
'md5': '7a7287612fa881a1ae1d087df45c2fd6',
'md5': 'ccb9359bf8c4795836e43759f3408a93',
'info_dict': {
'id': 'video-102143-1',
'ext': 'mp4',
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
'duration': 138,
},
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
'md5': '3c54c1f6243d279b706bde660ceec633',
'md5': '5c15e8f3da049e48829ec9786d835536',
'info_dict': {
'id': 'ts-5727-1',
'ext': 'mp4',
'title': 'Ganze Sendung',
'duration': 932,
},
}, {
# exclusive audio
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
'md5': '4cf22023c285f35e99c24d290ba58cc9',
'md5': '4bff8f23504df56a0d86ed312d654182',
'info_dict': {
'id': 'audio-29417-1',
'ext': 'mp3',
'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
},
}, {
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
'md5': '12cfb212d9325b5ba0d52b625f1aa61c',
'md5': 'f049fa1698d7564e9ca4c3325108f034',
'info_dict': {
'id': 'bnd-303-1',
'ext': 'mp4',
'title': 'SPD-Gruppenbild mit Bärbel Bas nach der Fraktionssitzung | dpa',
'ext': 'mp3',
'title': 'Das Siegel des Bundesnachrichtendienstes | dpa',
},
}, {
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
@@ -51,13 +55,24 @@ class TagesschauIE(InfoExtractor):
'id': 'afd-parteitag-135',
'title': 'AfD',
},
'playlist_count': 20,
'playlist_mincount': 15,
}, {
'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
'info_dict': {
'id': 'audio-29417-1',
'ext': 'mp3',
'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
},
}, {
'url': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-327.html',
'info_dict': {
'id': 'podcast-11km-327',
'ext': 'mp3',
'title': 'Gewalt in der Kita Wenn Erzieher:innen schweigen',
'upload_date': '20230322',
'timestamp': 1679482808,
'thumbnail': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-329~_v-original.jpg',
'description': 'md5:dad059931fe4b3693e3656e93a249848',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
@@ -117,7 +132,7 @@ class TagesschauIE(InfoExtractor):
formats = []
if media_url.endswith('master.m3u8'):
formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls')
elif media_url.endswith('.hi.mp3') and media_url.startswith('https://download'):
elif media_url.endswith('.mp3'):
formats = [{
'url': media_url,
'vcodec': 'none',
@@ -130,20 +145,19 @@ class TagesschauIE(InfoExtractor):
'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])),
'formats': formats
})
if not entries:
raise UnsupportedError(url)
if len(entries) > 1:
return self.playlist_result(entries, display_id, title)
formats = entries[0]['formats']
video_info = self._search_json_ld(webpage, video_id)
description = video_info.get('description')
thumbnail = self._og_search_thumbnail(webpage) or video_info.get('thumbnail')
timestamp = video_info.get('timestamp')
title = title or video_info.get('description')
return {
'id': display_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
'timestamp': timestamp,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
'formats': entries[0]['formats'],
'timestamp': parse_iso8601(self._html_search_meta('date', webpage)),
'description': self._og_search_description(webpage),
'duration': entries[0]['duration'],
}

View File

@@ -163,11 +163,9 @@ class VQQBaseIE(TencentBaseIE):
_REFERER = 'v.qq.com'
def _get_webpage_metadata(self, webpage, video_id):
return self._parse_json(
self._search_regex(
r'(?s)<script[^>]*>[^<]*window\.__pinia\s*=\s*([^<]+)</script>',
webpage, 'pinia data', fatal=False),
video_id, transform_source=js_to_json, fatal=False)
return self._search_json(
r'<script[^>]*>[^<]*window\.__(?:pinia|PINIA__)\s*=',
webpage, 'pinia data', video_id, transform_source=js_to_json, fatal=False)
class VQQVideoIE(VQQBaseIE):
@@ -176,7 +174,7 @@ class VQQVideoIE(VQQBaseIE):
_TESTS = [{
'url': 'https://v.qq.com/x/page/q326831cny0.html',
'md5': '84568b3722e15e9cd023b5594558c4a7',
'md5': 'b11c9cb781df710d686b950376676e2a',
'info_dict': {
'id': 'q326831cny0',
'ext': 'mp4',
@@ -187,7 +185,7 @@ class VQQVideoIE(VQQBaseIE):
},
}, {
'url': 'https://v.qq.com/x/page/o3013za7cse.html',
'md5': 'cc431c4f9114a55643893c2c8ebf5592',
'md5': 'a1bcf42c6d28c189bd2fe2d468abb287',
'info_dict': {
'id': 'o3013za7cse',
'ext': 'mp4',
@@ -208,6 +206,7 @@ class VQQVideoIE(VQQBaseIE):
'series': '鸡毛飞上天',
'format_id': r're:^shd',
},
'skip': '404',
}, {
'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html',
'md5': 'fadd10bf88aec3420f06f19ee1d24c5b',
@@ -220,6 +219,7 @@ class VQQVideoIE(VQQBaseIE):
'series': '青年理工工作者生活研究所',
'format_id': r're:^shd',
},
'params': {'skip_download': 'm3u8'},
}, {
# Geo-restricted to China
'url': 'https://v.qq.com/x/cover/mcv8hkc8zk8lnov/x0036x5qqsr.html',

View File

@@ -8,7 +8,7 @@ class TestURLIE(InfoExtractor):
""" Allows addressing of the test cases as test:yout.*be_1 """
IE_DESC = False # Do not list
_VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>[0-9]+))?$'
_VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>\d+|all))?$'
def _real_extract(self, url):
from . import gen_extractor_classes
@@ -36,6 +36,10 @@ class TestURLIE(InfoExtractor):
extractor = matching_extractors[0]
testcases = tuple(extractor.get_testcases(True))
if num == 'all':
return self.playlist_result(
[self.url_result(tc['url'], extractor) for tc in testcases],
url, f'{extractor.IE_NAME} tests')
try:
tc = testcases[int(num or 0)]
except IndexError:
@@ -43,4 +47,4 @@ class TestURLIE(InfoExtractor):
f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)
self.to_screen(f'Test URL: {tc["url"]}')
return self.url_result(tc['url'])
return self.url_result(tc['url'], extractor)

View File

@@ -62,7 +62,7 @@ class TikTokBaseIE(InfoExtractor):
return self._download_json(
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
fatal=fatal, note=note, errnote=errnote, headers={
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)',
'Accept': 'application/json',
}, query=query)
@@ -79,11 +79,11 @@ class TikTokBaseIE(InfoExtractor):
'_rticket': int(time.time() * 1000),
'ts': int(time.time()),
'device_brand': 'Google',
'device_type': 'Pixel 4',
'device_type': 'Pixel 7',
'device_platform': 'android',
'resolution': '1080*1920',
'resolution': '1080*2400',
'dpi': 420,
'os_version': '10',
'os_version': '13',
'os_api': '29',
'carrier_region': 'US',
'sys_region': 'US',
@@ -218,8 +218,8 @@ class TikTokBaseIE(InfoExtractor):
def extract_addr(addr, add_meta={}):
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
if res:
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height'))
known_resolutions[res].setdefault('width', add_meta.get('width'))
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height'))
known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width'))
parsed_meta.update(known_resolutions.get(res, {}))
add_meta.setdefault('height', int_or_none(res[:-1]))
return [{
@@ -624,6 +624,32 @@ class TikTokIE(TikTokBaseIE):
'thumbnails': 'count:3',
},
'expected_warnings': ['Unable to find video in feed'],
}, {
# 1080p format
'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830',
'md5': '982512017a8a917124d5a08c8ae79621',
'info_dict': {
'id': '7107337212743830830',
'ext': 'mp4',
'title': 'new music video 4 dont come backkkk🧸🖤 i hope u enjoy !! @musicontiktok',
'description': 'new music video 4 dont come backkkk🧸🖤 i hope u enjoy !! @musicontiktok',
'uploader': 'tatemcrae',
'uploader_id': '86328792343818240',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'creator': 't8',
'artist': 't8',
'track': 'original sound',
'upload_date': '20220609',
'timestamp': 1654805899,
'duration': 150,
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
'thumbnail': r're:^https://.+\.webp',
},
'params': {'format': 'bytevc1_1080p_808907-0'},
}, {
# Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',

View File

@@ -2,8 +2,11 @@ import re
from .common import InfoExtractor
from ..utils import (
bool_or_none,
int_or_none,
parse_iso8601,
traverse_obj,
url_or_none,
)
@@ -20,19 +23,25 @@ class TV4IE(InfoExtractor):
sport/|
)
)(?P<id>[0-9]+)'''
_GEO_COUNTRIES = ['SE']
_GEO_BYPASS = False
_TESTS = [
{
# not geo-restricted
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
'md5': 'cb837212f342d77cec06e6dad190e96d',
'info_dict': {
'id': '2491650',
'ext': 'mp4',
'title': 'Kalla Fakta 5 (english subtitles)',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': int,
'description': '2491650',
'series': 'Kalla fakta',
'duration': 1335,
'thumbnail': r're:^https?://[^/?#]+/api/v2/img/',
'timestamp': 1385373240,
'upload_date': '20131125',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Unable to download f4m manifest'],
},
{
'url': 'http://www.tv4play.se/iframe/video/3054113',
@@ -46,6 +55,7 @@ class TV4IE(InfoExtractor):
'timestamp': int,
'upload_date': '20150130',
},
'skip': '404 Not Found',
},
{
'url': 'http://www.tv4play.se/sport/3060959',
@@ -69,29 +79,28 @@ class TV4IE(InfoExtractor):
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
'https://playback-api.b17g.net/asset/%s' % video_id,
video_id, 'Downloading video info JSON', query={
'service': 'tv4',
'device': 'browser',
'protocol': 'hls,dash',
'drm': 'widevine',
})['metadata']
title = info['title']
manifest_url = self._download_json(
'https://playback-api.b17g.net/media/' + video_id,
video_id, query={
def _call_api(self, endpoint, video_id, headers=None, query={}):
return self._download_json(
f'https://playback2.a2d.tv/{endpoint}/{video_id}', video_id,
f'Downloading {endpoint} API JSON', headers=headers, query={
'service': 'tv4',
'device': 'browser',
'protocol': 'hls',
})['playbackItem']['manifestUrl']
formats = []
subtitles = {}
**query,
})
def _real_extract(self, url):
video_id = self._match_id(url)
info = traverse_obj(self._call_api('asset', video_id, query={
'protocol': 'hls,dash',
'drm': 'widevine',
}), ('metadata', {dict})) or {}
manifest_url = self._call_api(
'play', video_id, headers=self.geo_verification_headers())['playbackItem']['manifestUrl']
formats, subtitles = [], {}
fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, video_id, 'mp4',
@@ -117,20 +126,24 @@ class TV4IE(InfoExtractor):
subtitles = self._merge_subtitles(subtitles, subs)
if not formats and info.get('is_geo_restricted'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
self.raise_geo_restricted(
'This video is not available from your location due to geo-restriction, or not being authenticated',
countries=['SE'])
return {
'id': video_id,
'title': title,
'formats': formats,
'subtitles': subtitles,
'description': info.get('description'),
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('image'),
'is_live': info.get('isLive') is True,
'series': info.get('seriesTitle'),
'season_number': int_or_none(info.get('seasonNumber')),
'episode': info.get('episodeTitle'),
'episode_number': int_or_none(info.get('episodeNumber')),
**traverse_obj(info, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': (('broadcast_date_time', 'broadcastDateTime'), {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('image', {url_or_none}),
'is_live': ('isLive', {bool_or_none}),
'series': ('seriesTitle', {str}),
'season_number': ('seasonNumber', {int_or_none}),
'episode': ('episodeTitle', {str}),
'episode_number': ('episodeNumber', {int_or_none}),
}, get_all=False),
}

View File

@@ -488,9 +488,9 @@ class TVPVODBaseIE(InfoExtractor):
f'{self._API_BASE_URL}/{resource}', video_id,
query={'lang': 'pl', 'platform': 'BROWSER', **query},
expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
if is_valid(urlh.status):
if is_valid(urlh.getcode()):
return document
raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.getcode()})')
def _parse_video(self, video, with_url=True):
info_dict = traverse_obj(video, {

View File

@@ -60,7 +60,7 @@ class TwitchBaseIE(InfoExtractor):
@property
def _CLIENT_ID(self):
return self._configuration_arg(
'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key=TwitchStreamIE, casesense=True)[0]
'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key='Twitch', casesense=True)[0]
def _perform_login(self, username, password):
def fail(message):

View File

@@ -3,7 +3,6 @@ import re
from .common import InfoExtractor
from .periscope import PeriscopeBaseIE, PeriscopeIE
from ..compat import functools # isort: split
from ..compat import (
compat_parse_qs,
compat_urllib_parse_unquote,
@@ -30,11 +29,67 @@ from ..utils import (
class TwitterBaseIE(InfoExtractor):
_NETRC_MACHINE = 'twitter'
_API_BASE = 'https://api.twitter.com/1.1/'
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
_guest_token = None
_flow_token = None
_LOGIN_INIT_DATA = json.dumps({
'input_flow_data': {
'flow_context': {
'debug_overrides': {},
'start_location': {
'location': 'unknown'
}
}
},
'subtask_versions': {
'action_list': 2,
'alert_dialog': 1,
'app_download_cta': 1,
'check_logged_in_account': 1,
'choice_selection': 3,
'contacts_live_sync_permission_prompt': 0,
'cta': 7,
'email_verification': 2,
'end_flow': 1,
'enter_date': 1,
'enter_email': 2,
'enter_password': 5,
'enter_phone': 2,
'enter_recaptcha': 1,
'enter_text': 5,
'enter_username': 2,
'generic_urt': 3,
'in_app_notification': 1,
'interest_picker': 3,
'js_instrumentation': 1,
'menu_dialog': 1,
'notifications_permission_prompt': 2,
'open_account': 2,
'open_home_timeline': 1,
'open_link': 1,
'phone_verification': 4,
'privacy_options': 1,
'security_key': 3,
'select_avatar': 4,
'select_banner': 2,
'settings_list': 7,
'show_code': 1,
'sign_up': 2,
'sign_up_review': 4,
'tweet_selection_urt': 1,
'update_users': 1,
'upload_media': 1,
'user_recommendations_list': 4,
'user_recommendations_urt': 1,
'wait_spinner': 3,
'web_modal': 1
}
}, separators=(',', ':')).encode()
def _extract_variant_formats(self, variant, video_id):
variant_url = variant.get('url')
@@ -86,18 +141,151 @@ class TwitterBaseIE(InfoExtractor):
'height': int(m.group('height')),
})
@functools.cached_property
@property
def is_logged_in(self):
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
def _call_api(self, path, video_id, query={}, graphql=False):
cookies = self._get_cookies(self._API_BASE)
def _fetch_guest_token(self, headers, display_id):
headers.pop('x-guest-token', None)
self._guest_token = traverse_obj(self._download_json(
f'{self._API_BASE}guest/activate.json', display_id,
'Downloading guest token', data=b'', headers=headers), 'guest_token')
if not self._guest_token:
raise ExtractorError('Could not retrieve guest token')
def _set_base_headers(self):
headers = self._AUTH.copy()
csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
if csrf_token:
headers['x-csrf-token'] = csrf_token
return headers
csrf_cookie = cookies.get('ct0')
if csrf_cookie:
headers['x-csrf-token'] = csrf_cookie.value
def _call_login_api(self, note, headers, query={}, data=None):
response = self._download_json(
f'{self._API_BASE}onboarding/task.json', None, note,
headers=headers, query=query, data=data, expected_status=400)
error = traverse_obj(response, ('errors', 0, 'message', {str}))
if error:
raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
elif traverse_obj(response, 'status') != 'success':
raise ExtractorError('Login was unsuccessful')
subtask = traverse_obj(
response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
if not subtask:
raise ExtractorError('Twitter API did not return next login subtask')
self._flow_token = response['flow_token']
return subtask
def _perform_login(self, username, password):
if self.is_logged_in:
return
self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
headers = self._set_base_headers()
self._fetch_guest_token(headers, None)
headers.update({
'content-type': 'application/json',
'x-guest-token': self._guest_token,
'x-twitter-client-language': 'en',
'x-twitter-active-user': 'yes',
'Referer': 'https://twitter.com/',
'Origin': 'https://twitter.com',
})
def build_login_json(*subtask_inputs):
return json.dumps({
'flow_token': self._flow_token,
'subtask_inputs': subtask_inputs
}, separators=(',', ':')).encode()
def input_dict(subtask_id, text):
return {
'subtask_id': subtask_id,
'enter_text': {
'text': text,
'link': 'next_link'
}
}
next_subtask = self._call_login_api(
'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
while not self.is_logged_in:
if next_subtask == 'LoginJsInstrumentationSubtask':
next_subtask = self._call_login_api(
'Submitting JS instrumentation response', headers, data=build_login_json({
'subtask_id': next_subtask,
'js_instrumentation': {
'response': '{}',
'link': 'next_link'
}
}))
elif next_subtask == 'LoginEnterUserIdentifierSSO':
next_subtask = self._call_login_api(
'Submitting username', headers, data=build_login_json({
'subtask_id': next_subtask,
'settings_list': {
'setting_responses': [{
'key': 'user_identifier',
'response_data': {
'text_data': {
'result': username
}
}
}],
'link': 'next_link'
}
}))
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
next_subtask = self._call_login_api(
'Submitting alternate identifier', headers,
data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
'one of username, phone number or email that was not used as --username'))))
elif next_subtask == 'LoginEnterPassword':
next_subtask = self._call_login_api(
'Submitting password', headers, data=build_login_json({
'subtask_id': next_subtask,
'enter_password': {
'password': password,
'link': 'next_link'
}
}))
elif next_subtask == 'AccountDuplicationCheck':
next_subtask = self._call_login_api(
'Submitting account duplication check', headers, data=build_login_json({
'subtask_id': next_subtask,
'check_logged_in_account': {
'link': 'AccountDuplicationCheck_false'
}
}))
elif next_subtask == 'LoginTwoFactorAuthChallenge':
next_subtask = self._call_login_api(
'Submitting 2FA token', headers, data=build_login_json(input_dict(
next_subtask, self._get_tfa_info('two-factor authentication token'))))
elif next_subtask == 'LoginAcid':
next_subtask = self._call_login_api(
'Submitting confirmation code', headers, data=build_login_json(input_dict(
next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
elif next_subtask == 'LoginSuccessSubtask':
raise ExtractorError('Twitter API did not grant auth token cookie')
else:
raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
self.report_login()
def _call_api(self, path, video_id, query={}, graphql=False):
headers = self._set_base_headers()
if self.is_logged_in:
headers.update({
'x-twitter-auth-type': 'OAuth2Session',
@@ -106,15 +294,10 @@ class TwitterBaseIE(InfoExtractor):
})
for first_attempt in (True, False):
if not self.is_logged_in and not self._guest_token:
headers.pop('x-guest-token', None)
self._guest_token = traverse_obj(self._download_json(
f'{self._API_BASE}guest/activate.json', video_id,
'Downloading guest token', data=b'', headers=headers), 'guest_token')
if self._guest_token:
if not self.is_logged_in:
if not self._guest_token:
self._fetch_guest_token(headers, video_id)
headers['x-guest-token'] = self._guest_token
elif not self.is_logged_in:
raise ExtractorError('Could not retrieve guest token')
allowed_status = {400, 401, 403, 404} if graphql else {403}
result = self._download_json(

View File

@@ -112,18 +112,19 @@ class URPlayIE(InfoExtractor):
lang = ISO639Utils.short2long(lang)
return lang or None
for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
if (k in ('sd', 'hd') or not isinstance(v, dict)):
continue
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
if not sttl_url:
continue
lang = parse_lang_code(lang)
if not lang:
continue
sttl = subtitles.get(lang) or []
sttl.append({'ext': k, 'url': sttl_url, })
subtitles[lang] = sttl
for stream in urplayer_data['streamingInfo'].values():
for k, v in stream.items():
if (k in ('sd', 'hd') or not isinstance(v, dict)):
continue
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
if not sttl_url:
continue
lang = parse_lang_code(lang)
if not lang:
continue
sttl = subtitles.get(lang) or []
sttl.append({'ext': k, 'url': sttl_url, })
subtitles[lang] = sttl
image = urplayer_data.get('image') or {}
thumbnails = []

View File

@@ -39,7 +39,7 @@ class VidioBaseIE(InfoExtractor):
login_post, login_post_urlh = self._download_webpage_handle(
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
if login_post_urlh.status == 401:
if login_post_urlh.getcode() == 401:
if get_element_by_class('onboarding-content-register-popup__title', login_post):
raise ExtractorError(
'Unable to log in: The provided email has not registered yet.', expected=True)

View File

@@ -1,14 +1,86 @@
import json
import time
import urllib.error
import uuid
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
jwt_decode_hs256,
parse_age_limit,
traverse_obj,
try_call,
try_get,
unified_timestamp,
unified_strdate,
)
class VootIE(InfoExtractor):
class VootBaseIE(InfoExtractor):
_NETRC_MACHINE = 'voot'
_GEO_BYPASS = False
_LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.'
_TOKEN = None
_EXPIRY = 0
_API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'}
def _perform_login(self, username, password):
if self._TOKEN and self._EXPIRY:
return
if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
VootBaseIE._TOKEN = password
VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp']
self.report_login()
# Mobile number as username is not supported
elif not username.isdigit():
check_username = self._download_json(
'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({
'type': 'email',
'email': username
}, separators=(',', ':')).encode(), headers={
**self._API_HEADERS,
'Content-Type': 'application/json;charset=utf-8',
}, note='Checking username', expected_status=403)
if not traverse_obj(check_username, ('isExist', {bool})):
if traverse_obj(check_username, ('status', 'code', {int})) == 9999:
self.raise_geo_restricted(countries=['IN'])
raise ExtractorError('Incorrect username', expected=True)
auth_token = traverse_obj(self._download_json(
'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({
'type': 'traditional',
'deviceId': str(uuid.uuid4()),
'deviceBrand': 'PC/MAC',
'data': {
'email': username,
'password': password
}
}, separators=(',', ':')).encode(), headers={
**self._API_HEADERS,
'Content-Type': 'application/json;charset=utf-8',
}, note='Logging in', expected_status=400), ('data', 'authToken', {dict}))
if not auth_token:
raise ExtractorError('Incorrect password', expected=True)
VootBaseIE._TOKEN = auth_token['accessToken']
VootBaseIE._EXPIRY = auth_token['expirationTime']
else:
raise ExtractorError(self._LOGIN_HINT, expected=True)
def _check_token_expiry(self):
if int(time.time()) >= self._EXPIRY:
raise ExtractorError('Access token has expired', expected=True)
def _real_initialize(self):
if not self._TOKEN:
self.raise_login_required(self._LOGIN_HINT, method=None)
self._check_token_expiry()
class VootIE(VootBaseIE):
_VALID_URL = r'''(?x)
(?:
voot:|
@@ -20,27 +92,25 @@ class VootIE(InfoExtractor):
)
(?P<id>\d{3,})
'''
_GEO_COUNTRIES = ['IN']
_TESTS = [{
'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
'info_dict': {
'id': '0_8ledb18o',
'id': '441353',
'ext': 'mp4',
'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
'title': 'Is this the end of Kamini?',
'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
'timestamp': 1472162937,
'timestamp': 1472103000,
'upload_date': '20160825',
'series': 'Ishq Ka Rang Safed',
'season_number': 1,
'episode': 'Is this the end of Kamini?',
'episode_number': 340,
'view_count': int,
'like_count': int,
'release_date': '20160825',
'season': 'Season 1',
'age_limit': 13,
'duration': 1146.0,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to download m3u8 information'],
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
'only_matching': True,
@@ -55,59 +125,50 @@ class VootIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
media_info = self._download_json(
'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
query={
'platform': 'Web',
'pId': 2,
'mediaId': video_id,
})
'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id,
query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN})
status_code = try_get(media_info, lambda x: x['status']['code'], int)
if status_code != 0:
raise ExtractorError(media_info['status']['message'], expected=True)
try:
m3u8_url = self._download_json(
'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id,
'Downloading playback JSON', data=b'{}', headers={
**self.geo_verification_headers(),
**self._API_HEADERS,
'Content-Type': 'application/json;charset=utf-8',
'platform': 'androidwebdesktop',
'vootid': video_id,
'voottoken': self._TOKEN,
})['m3u8']
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 400:
self._check_token_expiry()
raise
media = media_info['assets']
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
self._remove_duplicate_formats(formats)
entry_id = media['EntryId']
title = media['MediaName']
formats = self._extract_m3u8_formats(
'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
video_id, 'mp4', m3u8_id='hls')
description, series, season_number, episode, episode_number = [None] * 5
for meta in try_get(media, lambda x: x['Metas'], list) or []:
key, value = meta.get('Key'), meta.get('Value')
if not key or not value:
continue
if key == 'ContentSynopsis':
description = value
elif key == 'RefSeriesTitle':
series = value
elif key == 'RefSeriesSeason':
season_number = int_or_none(value)
elif key == 'EpisodeMainTitle':
episode = value
elif key == 'EpisodeNo':
episode_number = int_or_none(value)
return {
'extractor_key': 'Kaltura',
'id': entry_id,
'title': title,
'description': description,
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'timestamp': unified_timestamp(media.get('CreationDate')),
'duration': int_or_none(media.get('Duration')),
'view_count': int_or_none(media.get('ViewCounter')),
'like_count': int_or_none(media.get('like_counter')),
'formats': formats,
'id': video_id,
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
'formats': traverse_obj(formats, (
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
'http_headers': self._API_HEADERS,
**traverse_obj(media_info, ('result', 0, {
'title': ('fullTitle', {str}),
'description': ('fullSynopsis', {str}),
'series': ('showName', {str}),
'season_number': ('season', {int_or_none}),
'episode': ('fullTitle', {str}),
'episode_number': ('episode', {int_or_none}),
'timestamp': ('uploadTime', {int_or_none}),
'release_date': ('telecastDate', {unified_strdate}),
'age_limit': ('ageNemonic', {parse_age_limit}),
'duration': ('duration', {float_or_none}),
})),
}
class VootSeriesIE(InfoExtractor):
class VootSeriesIE(VootBaseIE):
_VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
_TESTS = [{
'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',

View File

@@ -41,7 +41,7 @@ class WrestleUniverseBaseIE(InfoExtractor):
token = try_call(lambda: self._get_cookies('https://www.wrestle-universe.com/')['token'].value)
if not token and not self._REFRESH_TOKEN:
self.raise_login_required()
self._REAL_TOKEN = token
self._TOKEN = token
if not self._REAL_TOKEN or self._TOKEN_EXPIRY <= int(time.time()):
if not self._REFRESH_TOKEN:

View File

@@ -158,7 +158,7 @@ class XimalayaAlbumIE(XimalayaBaseIE):
return self._download_json(
'https://www.ximalaya.com/revision/album/v1/getTracksList',
playlist_id, note=f'Downloading tracks list page {page_idx}',
query={'albumId': playlist_id, 'pageNum': page_idx, 'sort': 1})['data']
query={'albumId': playlist_id, 'pageNum': page_idx})['data']
def _get_entries(self, page_data):
for e in page_data['tracks']:

View File

@@ -1,9 +1,10 @@
from .common import InfoExtractor
from ..utils import (
OnDemandPagedList,
int_or_none,
traverse_obj,
unified_timestamp,
url_or_none
url_or_none,
)
@@ -97,3 +98,30 @@ class YappyIE(InfoExtractor):
'categories': traverse_obj(media_data, ('categories', ..., 'name')) or None,
'repost_count': int_or_none(media_data.get('sharingCount'))
}
class YappyProfileIE(InfoExtractor):
_VALID_URL = r'https?://yappy\.media/profile/(?P<id>\w+)'
_TESTS = [{
'url': 'https://yappy.media/profile/59a0c8c485e5410b9c43474bf4c6a373',
'info_dict': {
'id': '59a0c8c485e5410b9c43474bf4c6a373',
},
'playlist_mincount': 527,
}]
def _real_extract(self, url):
profile_id = self._match_id(url)
def fetch_page(page_num):
page_num += 1
videos = self._download_json(
f'https://yappy.media/api/video/list/{profile_id}?page={page_num}',
profile_id, f'Downloading profile page {page_num} JSON')
for video in traverse_obj(videos, ('results', lambda _, v: v['uuid'])):
yield self.url_result(
f'https://yappy.media/video/{video["uuid"]}', YappyIE,
video['uuid'], video.get('description'))
return self.playlist_result(OnDemandPagedList(fetch_page, 15), profile_id)

View File

@@ -258,7 +258,7 @@ def build_innertube_clients():
THIRD_PARTY = {
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
}
BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@@ -292,6 +292,7 @@ class BadgeType(enum.Enum):
AVAILABILITY_PREMIUM = enum.auto()
AVAILABILITY_SUBSCRIPTION = enum.auto()
LIVE_NOW = enum.auto()
VERIFIED = enum.auto()
class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -791,17 +792,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_and_report_alerts(self, data, *args, **kwargs):
return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
def _extract_badges(self, renderer: dict):
privacy_icon_map = {
def _extract_badges(self, badge_list: list):
"""
Extract known BadgeType's from a list of badge renderers.
@returns [{'type': BadgeType}]
"""
icon_type_map = {
'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
'CHECK': BadgeType.VERIFIED,
}
badge_style_map = {
'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
}
label_map = {
@@ -809,13 +819,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'private': BadgeType.AVAILABILITY_PRIVATE,
'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
'live': BadgeType.LIVE_NOW,
'premium': BadgeType.AVAILABILITY_PREMIUM
'premium': BadgeType.AVAILABILITY_PREMIUM,
'verified': BadgeType.VERIFIED,
'official artist channel': BadgeType.VERIFIED,
}
badges = []
for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
badge_type = (
privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
or badge_style_map.get(traverse_obj(badge, 'style'))
)
if badge_type:
@@ -823,11 +835,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
continue
# fallback, won't work in some languages
label = traverse_obj(badge, 'label', expected_type=str, default='')
label = traverse_obj(
badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
for match, label_badge_type in label_map.items():
if match in label.lower():
badges.append({'type': badge_type})
continue
badges.append({'type': label_badge_type})
break
return badges
@@ -1020,8 +1033,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
get_all=False, expected_type=str)
badges = self._extract_badges(renderer)
badges = self._extract_badges(traverse_obj(renderer, 'badges'))
owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
expected_type=str)) or ''
@@ -1079,7 +1092,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
view_count_field: view_count,
'live_status': live_status
'live_status': live_status,
'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
}
@@ -1332,6 +1346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Philipp Hagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
},
'params': {
'skip_download': True,
@@ -1415,6 +1430,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'The Witcher',
'uploader_url': 'https://www.youtube.com/@thewitcher',
'uploader_id': '@thewitcher',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
},
},
{
@@ -1444,6 +1462,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
'uploader_id': '@FlyingKitty900',
'comment_count': int,
'channel_is_verified': True,
},
},
{
@@ -1577,6 +1596,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Olympics',
'uploader_url': 'https://www.youtube.com/@Olympics',
'uploader_id': '@Olympics',
'channel_is_verified': True,
},
'params': {
'skip_download': 'requires avconv',
@@ -1894,6 +1914,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Bernie Sanders',
'uploader_url': 'https://www.youtube.com/@BernieSanders',
'uploader_id': '@BernieSanders',
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {
'skip_download': True,
@@ -1955,6 +1977,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Vsauce',
'uploader_url': 'https://www.youtube.com/@Vsauce',
'uploader_id': '@Vsauce',
'comment_count': int,
'channel_is_verified': True,
},
'params': {
'skip_download': True,
@@ -2147,6 +2171,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'kudvenkat',
'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
'uploader_id': '@Csharp-video-tutorialsBlogspot',
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {
'skip_download': True,
@@ -2227,6 +2253,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'CBS Mornings',
'uploader_url': 'https://www.youtube.com/@CBSMornings',
'uploader_id': '@CBSMornings',
'comment_count': int,
'channel_is_verified': True,
}
},
{
@@ -2297,6 +2325,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'colinfurze',
'uploader_url': 'https://www.youtube.com/@colinfurze',
'uploader_id': '@colinfurze',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {
'format': '17', # 3gp format available on android
@@ -2342,6 +2373,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'SciShow',
'uploader_url': 'https://www.youtube.com/@SciShow',
'uploader_id': '@SciShow',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
}, 'params': {'format': 'mhtml', 'skip_download': True}
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@@ -2370,6 +2404,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Leon Nguyen',
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
'heatmap': 'count:100',
}
}, {
# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
@@ -2398,6 +2433,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Leon Nguyen',
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
'heatmap': 'count:100',
},
'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
}, {
@@ -2428,6 +2464,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Quackity',
'uploader_id': '@Quackity',
'uploader_url': 'https://www.youtube.com/@Quackity',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
}
},
{ # continuous livestream. Microformat upload date should be preferred.
@@ -2594,6 +2633,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'MrBeast',
'uploader_url': 'https://www.youtube.com/@MrBeast',
'uploader_id': '@MrBeast',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
}, {
@@ -2655,6 +2697,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'さなちゃんねる',
'uploader_url': 'https://www.youtube.com/@sana_natori',
'uploader_id': '@sana_natori',
'channel_is_verified': True,
'heatmap': 'count:100',
},
},
{
@@ -2684,6 +2728,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'thumbnail': r're:^https?://.*\.webp',
'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
'playable_in_embed': True,
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {
'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
@@ -2720,6 +2767,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Christopher Sykes',
'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
'uploader_id': '@ChristopherSykesDocumentaries',
'heatmap': 'count:100',
},
'params': {
'skip_download': True,
@@ -3121,7 +3169,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return funcname
return json.loads(js_to_json(self._search_regex(
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])[,;]', jscode,
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
def _extract_n_function_code(self, video_id, player_url):
@@ -3337,14 +3385,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['author_is_uploader'] = author_is_uploader
comment_abr = traverse_obj(
comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict)
comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
if comment_abr is not None:
info['is_favorited'] = 'creatorHeart' in comment_abr
comment_ab_icontype = traverse_obj(
comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType'))
if comment_ab_icontype is not None:
info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE')
badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
if self._has_badge(badges, BadgeType.VERIFIED):
info['author_is_verified'] = True
is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
if is_pinned:
@@ -3581,7 +3628,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
_STORY_PLAYER_PARAMS = '8AEB'
_PLAYER_PARAMS = 'CgIQBg=='
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
@@ -3595,7 +3642,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'videoId': video_id,
}
if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
yt_query['params'] = self._STORY_PLAYER_PARAMS
yt_query['params'] = self._PLAYER_PARAMS
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
@@ -3607,7 +3654,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
default = ['android', 'web']
default = ['ios', 'android', 'web']
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@@ -3830,6 +3877,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt.get('fps')) or 0
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
@@ -3837,16 +3886,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format_note': join_nonempty(
join_nonempty(audio_track.get('displayName'),
language_preference > 0 and ' (default)', delim=''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
fmt.get('isDrc') and 'DRC',
name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED',
(self.get_param('verbose') or all_formats) and client_name,
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': -10 if throttled else -5 if itag == '22' else -1,
'fps': int_or_none(fmt.get('fps')) or None,
'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
+ (100 if 'Premium' in name else 0)),
'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
'audio_channels': fmt.get('audioChannels'),
'height': height,
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
@@ -3915,11 +3964,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif itag:
f['format_id'] = itag
if itag in ('616', '235'):
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
f['source_preference'] = (f.get('source_preference') or -1) + 100
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
if f['quality'] == -1 and f.get('height'):
f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
if self.get_param('verbose'):
if self.get_param('verbose') or all_formats:
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
if f.get('fps') and f['fps'] <= 1:
del f['fps']
return True
subtitles = {}
@@ -3992,8 +4047,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
if smuggled_data.get('is_story'):
query['pp'] = self._STORY_PLAYER_PARAMS
if smuggled_data.get('is_story'): # XXX: Deprecated
query['pp'] = self._PLAYER_PARAMS
webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query)
@@ -4297,9 +4352,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, None, ' from %s')
# Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility
if lang_code == f'a-{orig_trans_code}':
# Set audio language based on original subtitles
for f in formats:
if f.get('acodec') != 'none' and not f.get('language'):
f['language'] = orig_trans_code
# Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility
process_language(
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
# Setting tlang=lang returns damaged subtitles.
@@ -4319,15 +4378,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info[d_k] = parse_duration(query[k][0])
# Youtube Music Auto-generated description
if video_description:
if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
# XXX: Causes catastrophic backtracking if description has "·"
# E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
# Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
# reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
mobj = re.search(
r'''(?xs)
(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
(?P<album>[^\n]+)
(?=(?P<track>[^\n·]+))(?P=track)·
(?=(?P<artist>[^\n]+))(?P=artist)\n+
(?=(?P<album>[^\n]+))(?P=album)\n
(?:.+?\s*(?P<release_year>\d{4})(?!\d))?
(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
.+\nAuto-generated\ by\ YouTube\.\s*$
(.+?\nArtist\s*:\s*
(?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
)?.+\nAuto-generated\ by\ YouTube\.\s*$
''', video_description)
if mobj:
release_year = mobj.group('release_year')
@@ -4488,6 +4553,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['artist'] = mrr_contents_text
elif mrr_title == 'Song':
info['track'] = mrr_contents_text
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
if self._has_badge(owner_badges, BadgeType.VERIFIED):
info['channel_is_verified'] = True
info.update({
'uploader': info.get('channel'),
@@ -4505,7 +4573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
):
upload_date = strftime_or_none(
self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
info['upload_date'] = upload_date
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
@@ -4513,7 +4581,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if v:
info[d_k] = v
badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
badges = self._extract_badges(traverse_obj(vpir, 'badges'))
is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
or get_first(video_details, 'isPrivate', expected_type=bool))
@@ -4586,13 +4654,14 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
channel_id = self.ucid_or_none(renderer['channelId'])
title = self._get_text(renderer, 'title')
channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
# As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
# However we can expect them to change that in the future.
channel_handle = self.handle_from_url(
traverse_obj(renderer, (
'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
('browseEndpoint', 'canonicalBaseUrl')),
{str}), get_all=False))
if not channel_handle:
# As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
return {
'_type': 'url',
'url': channel_url,
@@ -4605,10 +4674,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
'title': title,
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
# See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
# However, in feed/channels this is set correctly to the subscriber count
'channel_follower_count': traverse_obj(
renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
'playlist_count': self._get_count(renderer, 'videoCountText'),
'playlist_count': (
# videoCountText may be the subscriber count
self._get_count(renderer, 'videoCountText')
if self._get_count(renderer, 'subscriberCountText') is not None else None),
'description': self._get_text(renderer, 'descriptionSnippet'),
'channel_is_verified': True if self._has_badge(
self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
}
def _grid_entries(self, grid_renderer):
@@ -5024,6 +5101,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
})
channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
if self._has_badge(channel_badges, BadgeType.VERIFIED):
info['channel_is_verified'] = True
# Playlist stats is a text runs array containing [video count, view count, last updated].
# last updated or (view count and last updated) may be missing.
playlist_stats = get_first(
@@ -5032,7 +5113,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
last_updated_unix = self._parse_time_text(
self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
info['modified_date'] = strftime_or_none(last_updated_unix)
info['view_count'] = self._get_count(playlist_stats, 1)
if info['view_count'] is None: # 0 is allowed
@@ -5132,7 +5213,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
player_header_privacy = playlist_header_renderer.get('privacy')
badges = self._extract_badges(sidebar_renderer)
badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
privacy_setting_icon = get_first(
@@ -5382,7 +5463,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader': '3Blue1Brown',
'tags': ['Mathematics'],
'channel_follower_count': int
'channel_follower_count': int,
'channel_is_verified': True,
},
}, {
'note': 'playlists, singlepage',
@@ -5559,6 +5641,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
'channel_is_verified': True,
},
}, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
@@ -5722,7 +5805,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
'id': 'AlTsmyW4auo', # This will keep changing
'id': 'hGkQjiJLjWQ', # This will keep changing
'ext': 'mp4',
'title': str,
'upload_date': r're:\d{8}',
@@ -5746,6 +5829,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@SkyNews',
'uploader_id': '@SkyNews',
'uploader': 'Sky News',
'channel_is_verified': True,
},
'params': {
'skip_download': True,
@@ -6234,7 +6318,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': str,
'uploader': str,
'uploader_url': str,
'uploader_id': str
'uploader_id': str,
'channel_is_verified': bool, # this will keep changing
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
@@ -6270,6 +6355,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'PewDiePie',
'uploader_url': 'https://www.youtube.com/@PewDiePie',
'uploader_id': '@PewDiePie',
'channel_is_verified': True,
}
}],
'params': {'extract_flat': True},
@@ -6288,6 +6374,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
'channel_is_verified': True,
},
'playlist_count': 0,
}, {
@@ -6322,6 +6409,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'description': 'I make music',
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
'channel_follower_count': int,
'channel_is_verified': True,
},
'playlist_mincount': 10,
}]
@@ -6897,12 +6985,15 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
'title': 'Kurzgesagt In a Nutshell',
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
'playlist_count': int, # XXX: should have a way of saying > 1
# No longer available for search as it is set to the handle.
# 'playlist_count': int,
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'thumbnails': list,
'uploader_id': '@kurzgesagt',
'uploader_url': 'https://www.youtube.com/@kurzgesagt',
'uploader': 'Kurzgesagt In a Nutshell',
'channel_is_verified': True,
'channel_follower_count': int,
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
@@ -7166,6 +7257,8 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
'live_status': 'not_live',
'channel_follower_count': int,
'chapters': 'count:20',
'comment_count': int,
'heatmap': 'count:100',
}
}]
@@ -7226,6 +7319,8 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
'channel': 'さなちゃんねる',
'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
'uploader': 'さなちゃんねる',
'channel_is_verified': True,
'heatmap': 'count:100',
},
'add_ie': ['Youtube'],
'params': {'skip_download': 'Youtube'},

130
yt_dlp/extractor/zaiko.py Normal file
View File

@@ -0,0 +1,130 @@
import base64
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
str_or_none,
traverse_obj,
try_call,
unescapeHTML,
url_or_none,
)
class ZaikoBaseIE(InfoExtractor):
def _download_real_webpage(self, url, video_id):
webpage, urlh = self._download_webpage_handle(url, video_id)
final_url = urlh.geturl()
if 'zaiko.io/login' in final_url:
self.raise_login_required()
elif '/_buy/' in final_url:
raise ExtractorError('Your account does not have tickets to this event', expected=True)
return webpage
def _parse_vue_element_attr(self, name, string, video_id):
page_elem = self._search_regex(rf'(<{name}[^>]+>)', string, name)
attrs = {}
for key, value in extract_attributes(page_elem).items():
if key.startswith(':'):
attrs[key[1:]] = self._parse_json(
value, video_id, transform_source=unescapeHTML, fatal=False)
return attrs
class ZaikoIE(ZaikoBaseIE):
_VALID_URL = r'https?://(?:[\w-]+\.)?zaiko\.io/event/(?P<id>\d+)/stream(?:/\d+)+'
_TESTS = [{
'url': 'https://zaiko.io/event/324868/stream/20571/20571',
'info_dict': {
'id': '324868',
'ext': 'mp4',
'title': 'ZAIKO STREAMING TEST',
'alt_title': '[VOD] ZAIKO STREAMING TEST_20210603(Do Not Delete)',
'uploader_id': '454',
'uploader': 'ZAIKO ZERO',
'release_timestamp': 1583809200,
'thumbnail': r're:https://[a-z0-9]+.cloudfront.net/[a-z0-9_]+/[a-z0-9_]+',
'release_date': '20200310',
'categories': ['Tech House'],
'live_status': 'was_live',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_real_webpage(url, video_id)
stream_meta = self._parse_vue_element_attr('stream-page', webpage, video_id)
player_page = self._download_webpage(
stream_meta['stream-access']['video_source'], video_id,
'Downloading player page', headers={'referer': 'https://zaiko.io/'})
player_meta = self._parse_vue_element_attr('player', player_page, video_id)
status = traverse_obj(player_meta, ('initial_event_info', 'status', {str}))
live_status, msg, expected = {
'vod': ('was_live', 'No VOD stream URL was found', False),
'archiving': ('post_live', 'Event VOD is still being processed', True),
'deleting': ('post_live', 'This event has ended', True),
'deleted': ('post_live', 'This event has ended', True),
'error': ('post_live', 'This event has ended', True),
'disconnected': ('post_live', 'Stream has been disconnected', True),
'live_to_disconnected': ('post_live', 'Stream has been disconnected', True),
'live': ('is_live', 'No livestream URL found was found', False),
'waiting': ('is_upcoming', 'Live event has not yet started', True),
'cancelled': ('not_live', 'Event has been cancelled', True),
}.get(status) or ('not_live', f'Unknown event status "{status}"', False)
stream_url = traverse_obj(player_meta, ('initial_event_info', 'endpoint', {url_or_none}))
formats = self._extract_m3u8_formats(
stream_url, video_id, live=True, fatal=False) if stream_url else []
if not formats:
self.raise_no_formats(msg, expected=expected)
return {
'id': video_id,
'formats': formats,
'live_status': live_status,
**traverse_obj(stream_meta, {
'title': ('event', 'name', {str}),
'uploader': ('profile', 'name', {str}),
'uploader_id': ('profile', 'id', {str_or_none}),
'release_timestamp': ('stream', 'start', 'timestamp', {int_or_none}),
'categories': ('event', 'genres', ..., {lambda x: x or None}),
}),
**traverse_obj(player_meta, ('initial_event_info', {
'alt_title': ('title', {str}),
'thumbnail': ('poster_url', {url_or_none}),
})),
}
class ZaikoETicketIE(ZaikoBaseIE):
_VALID_URL = r'https?://(?:www.)?zaiko\.io/account/eticket/(?P<id>[\w=-]{49})'
_TESTS = [{
'url': 'https://zaiko.io/account/eticket/TZjMwMzQ2Y2EzMXwyMDIzMDYwNzEyMTMyNXw1MDViOWU2Mw==',
'playlist_count': 1,
'info_dict': {
'id': 'f30346ca31-20230607121325-505b9e63',
'title': 'ZAIKO STREAMING TEST',
'thumbnail': 'https://media.zkocdn.net/pf_1/1_3wdyjcjyupseatkwid34u',
},
'skip': 'Only available with the ticketholding account',
}]
def _real_extract(self, url):
ticket_id = self._match_id(url)
ticket_id = try_call(
lambda: base64.urlsafe_b64decode(ticket_id[1:]).decode().replace('|', '-')) or ticket_id
webpage = self._download_real_webpage(url, ticket_id)
eticket = self._parse_vue_element_attr('eticket', webpage, ticket_id)
return self.playlist_result(
[self.url_result(stream, ZaikoIE) for stream in traverse_obj(eticket, ('streams', ..., 'url'))],
ticket_id, **traverse_obj(eticket, ('ticket-details', {
'title': 'event_name',
'thumbnail': 'event_img_url',
})))

View File

@@ -24,7 +24,7 @@ from ..utils import (
class ZDFBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['DE']
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'uhd')
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
headers = {}
@@ -61,6 +61,9 @@ class ZDFBaseIE(InfoExtractor):
elif mime_type == 'application/f4m+xml' or ext == 'f4m':
new_formats = self._extract_f4m_formats(
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
elif ext == 'mpd':
new_formats = self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False)
else:
f = parse_codecs(meta.get('mimeCodec'))
if not f and meta.get('type'):

View File

@@ -1,14 +1,16 @@
import json
import random
import string
import time
import uuid
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
jwt_decode_hs256,
parse_age_limit,
str_or_none,
try_call,
try_get,
unified_strdate,
unified_timestamp,
@@ -94,12 +96,12 @@ class Zee5IE(InfoExtractor):
'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973',
'only_matching': True
}]
_DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
_DEVICE_ID = ''.join(random.choices(string.ascii_letters + string.digits, k=20)).ljust(32, '0')
_DEVICE_ID = str(uuid.uuid4())
_USER_TOKEN = None
_LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
_NETRC_MACHINE = 'zee5'
_GEO_COUNTRIES = ['IN']
_USER_COUNTRY = None
def _perform_login(self, username, password):
if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
@@ -118,11 +120,16 @@ class Zee5IE(InfoExtractor):
self._USER_TOKEN = otp_verify_json.get('token')
if not self._USER_TOKEN:
raise ExtractorError(otp_request_json['message'], expected=True)
elif username.lower() == 'token' and len(password) > 1198:
elif username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
self._USER_TOKEN = password
else:
raise ExtractorError(self._LOGIN_HINT, expected=True)
token = jwt_decode_hs256(self._USER_TOKEN)
if token.get('exp', 0) <= int(time.time()):
raise ExtractorError('User token has expired', expected=True)
self._USER_COUNTRY = token.get('current_country')
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
access_token_request = self._download_json(
@@ -137,8 +144,13 @@ class Zee5IE(InfoExtractor):
data['X-Z5-Guest-Token'] = self._DEVICE_ID
json_data = self._download_json(
self._DETAIL_API_URL.format(video_id, self._DEVICE_ID),
video_id, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
'https://spapi.zee5.com/singlePlayback/getDetails/secure', video_id, query={
'content_id': video_id,
'device_id': self._DEVICE_ID,
'platform_name': 'desktop_web',
'country': self._USER_COUNTRY or self.get_param('geo_bypass_country') or 'IN',
'check_parental_control': False,
}, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
asset_data = json_data['assetDetails']
show_data = json_data.get('showDetails', {})
if 'premium' in asset_data['business_type']:

View File

@@ -44,7 +44,7 @@ def _js_arith_op(op):
def _js_div(a, b):
if JS_Undefined in (a, b) or not (a and b):
if JS_Undefined in (a, b) or not (a or b):
return float('nan')
return (a or 0) / b if b else float('inf')
@@ -779,7 +779,7 @@ class JSInterpreter:
obj = {}
obj_m = re.search(
r'''(?x)
(?<!this\.)%s\s*=\s*{\s*
(?<!\.)%s\s*=\s*{\s*
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
}\s*;
''' % (re.escape(objname), _FUNC_NAME_RE),
@@ -812,9 +812,9 @@ class JSInterpreter:
\((?P<args>[^)]*)\)\s*
(?P<code>{.+})''' % {'name': re.escape(funcname)},
self.code)
code, _ = self._separate_at_paren(func_m.group('code'))
if func_m is None:
raise self.Exception(f'Could not find JS function "{funcname}"')
code, _ = self._separate_at_paren(func_m.group('code'))
return [x.strip() for x in func_m.group('args').split(',')], code
def extract_function(self, funcname):

View File

@@ -474,15 +474,15 @@ def create_parser():
callback_kwargs={
'allowed_values': {
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
}, 'aliases': {
'youtube-dl': ['all', '-multistreams'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': ['no-external-downloader-progress'],
'2022': ['no-external-downloader-progress', 'playlist-match-filter'],
}
}, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
@@ -727,6 +727,10 @@ def create_parser():
'--netrc-location',
dest='netrc_location', metavar='PATH',
help='Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc')
authentication.add_option(
'--netrc-cmd',
dest='netrc_cmd', metavar='NETRC_CMD',
help='Command to execute to get the credentials for an extractor.')
authentication.add_option(
'--video-password',
dest='videopassword', metavar='PASSWORD',
@@ -1015,8 +1019,9 @@ def create_parser():
'--download-sections',
metavar='REGEX', dest='download_ranges', action='append',
help=(
'Download only chapters whose title matches the given regular expression. '
'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
'Download only chapters that match the regular expression. '
'A "*" prefix denotes time-range instead of chapter. Negative timestamps are calculated from the end. '
'"*from-url" can be used to download between the "start_time" and "end_time" extracted from the URL. '
'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
'e.g. --download-sections "*10:15-inf" --download-sections "intro"'))
downloader.add_option(
@@ -1417,8 +1422,7 @@ def create_parser():
'--clean-info-json', '--clean-infojson',
action='store_true', dest='clean_infojson', default=None,
help=(
'Remove some private fields such as filenames from the infojson. '
'Note that it could still contain some personal information (default)'))
'Remove some internal metadata such as filenames from the infojson (default)'))
filesystem.add_option(
'--no-clean-info-json', '--no-clean-infojson',
action='store_false', dest='clean_infojson',
@@ -1681,8 +1685,7 @@ def create_parser():
'Execute a command, optionally prefixed with when to execute it, separated by a ":". '
'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). '
'Same syntax as the output template can be used to pass any field as arguments to the command. '
'After download, an additional field "filepath" that contains the final path of the downloaded file '
'is also available, and if no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '
'If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '
'This option can be used multiple times'))
postproc.add_option(
'--no-exec',

View File

@@ -187,7 +187,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
tmpl = progress_template.get('postprocess')
if tmpl:
self._downloader.to_screen(
self._downloader.evaluate_outtmpl(tmpl, progress_dict), skip_eol=True, quiet=False)
self._downloader.evaluate_outtmpl(tmpl, progress_dict), quiet=False)
self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',

View File

@@ -149,7 +149,7 @@ class Updater:
f'You are switching to an {self.ydl._format_err("unofficial", "red")} executable '
f'from {self.ydl._format_err(self._target_repo, self.ydl.Styles.EMPHASIS)}. '
f'Run {self.ydl._format_err("at your own risk", "light red")}')
self.restart = self._blocked_restart
self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
else:
self._target_repo = UPDATE_SOURCES.get(self.target_channel)
if not self._target_repo:
@@ -294,6 +294,7 @@ class Updater:
if (_VERSION_RE.fullmatch(self.target_tag[5:])
and version_tuple(self.target_tag[5:]) < (2023, 3, 2)):
self.ydl.report_warning('You are downgrading to a version without --update-to')
self._block_restart('Cannot automatically restart to a version without --update-to')
directory = os.path.dirname(self.filename)
if not os.access(self.filename, os.W_OK):
@@ -381,11 +382,11 @@ class Updater:
_, _, returncode = Popen.run(self.cmd)
return returncode
def _blocked_restart(self):
self._report_error(
'Automatically restarting into custom builds is disabled for security reasons. '
'Restart yt-dlp to use the updated version', expected=True)
return self.ydl._download_retcode
def _block_restart(self, msg):
def wrapper():
self._report_error(f'{msg}. Restart yt-dlp to use the updated version', expected=True)
return self.ydl._download_retcode
self.restart = wrapper
def run_update(ydl):

View File

@@ -6,7 +6,7 @@ import sys
import urllib.parse
import zlib
from ._utils import decode_base_n, preferredencoding
from ._utils import Popen, decode_base_n, preferredencoding
from .traversal import traverse_obj
from ..dependencies import certifi, websockets
@@ -174,3 +174,7 @@ def handle_youtubedl_headers(headers):
del filtered_headers['Youtubedl-no-compression']
return filtered_headers
def process_communicate_or_kill(p, *args, **kwargs):
return Popen.communicate_or_kill(p, *args, **kwargs)

View File

@@ -25,6 +25,7 @@ import json
import locale
import math
import mimetypes
import netrc
import operator
import os
import platform
@@ -864,10 +865,11 @@ def escapeHTML(text):
)
def process_communicate_or_kill(p, *args, **kwargs):
deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
return Popen.communicate_or_kill(p, *args, **kwargs)
class netrc_from_content(netrc.netrc):
def __init__(self, content):
self.hosts, self.macros = {}, {}
with io.StringIO(content) as stream:
self._parse('-', stream, False)
class Popen(subprocess.Popen):
@@ -1654,7 +1656,7 @@ def unified_strdate(date_str, day_first=True):
def unified_timestamp(date_str, day_first=True, with_milliseconds=False):
if date_str is None:
if not isinstance(date_str, str):
return None
date_str = re.sub(r'\s+', ' ', re.sub(
@@ -2446,13 +2448,16 @@ def request_to_url(req):
return req
def strftime_or_none(timestamp, date_format, default=None):
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
datetime_object = None
try:
if isinstance(timestamp, (int, float)): # unix timestamp
# Using naive datetime here can break timestamp() in Windows
# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
# Also, datetime.datetime.fromtimestamp breaks for negative timestamps
# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
datetime_object = (datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
+ datetime.timedelta(seconds=timestamp))
elif isinstance(timestamp, str): # assume YYYYMMDD
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
date_format = re.sub( # Support %s on windows
@@ -3304,7 +3309,7 @@ STR_FORMAT_RE_TMPL = r'''(?x)
'''
STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
def limit_length(s, length):
@@ -3507,7 +3512,8 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
},
}
sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
sanitize_codec = functools.partial(
try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
for ext in preferences or COMPATIBLE_CODECS.keys():
@@ -3753,12 +3759,10 @@ def match_filter_func(filters, breaking_filters=None):
class download_range_func:
def __init__(self, chapters, ranges):
self.chapters, self.ranges = chapters, ranges
def __init__(self, chapters, ranges, from_info=False):
self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
def __call__(self, info_dict, ydl):
if not self.ranges and not self.chapters:
yield {}
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
else 'Cannot match chapters since chapter information is unavailable')
@@ -3770,7 +3774,23 @@ class download_range_func:
if self.chapters and warning:
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
for start, end in self.ranges or []:
yield {
'start_time': self._handle_negative_timestamp(start, info_dict),
'end_time': self._handle_negative_timestamp(end, info_dict),
}
if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
yield {
'start_time': info_dict.get('start_time') or 0,
'end_time': info_dict.get('end_time') or float('inf'),
}
elif not self.ranges and not self.chapters:
yield {}
@staticmethod
def _handle_negative_timestamp(time, info):
return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
def __eq__(self, other):
return (isinstance(other, download_range_func)
@@ -4152,6 +4172,7 @@ class ISO639Utils:
'or': 'ori',
'os': 'oss',
'pa': 'pan',
'pe': 'per',
'pi': 'pli',
'pl': 'pol',
'ps': 'pus',
@@ -5673,6 +5694,7 @@ def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None)
return orderedSet(requested)
# TODO: Rewrite
class FormatSorter:
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
@@ -5721,8 +5743,10 @@ class FormatSorter:
'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
'function': lambda it: next(filter(None, it), None)},
'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
'function': lambda it: next(filter(None, it), None)},
'ext': {'type': 'combined', 'field': ('vext', 'aext')},
'res': {'type': 'multiple', 'field': ('height', 'width'),
'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
@@ -5953,13 +5977,15 @@ class FormatSorter:
format['preference'] = -100
# Determine missing bitrates
if format.get('tbr') is None:
if format.get('vbr') is not None and format.get('abr') is not None:
format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
else:
if format.get('vcodec') != 'none' and format.get('vbr') is None:
format['vbr'] = format.get('tbr') - format.get('abr', 0)
if format.get('acodec') != 'none' and format.get('abr') is None:
format['abr'] = format.get('tbr') - format.get('vbr', 0)
if format.get('vcodec') == 'none':
format['vbr'] = 0
if format.get('acodec') == 'none':
format['abr'] = 0
if not format.get('vbr') and format.get('vcodec') != 'none':
format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
if not format.get('abr') and format.get('acodec') != 'none':
format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
if not format.get('tbr'):
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
return tuple(self._calculate_field_preference(format, field) for field in self._order)

View File

@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
__version__ = '2023.03.04'
__version__ = '2023.06.22'
RELEASE_GIT_HEAD = '392389b7df7b818f794b231f14dc396d4875fbad'
RELEASE_GIT_HEAD = '812cdfa06c33a40e73a8e04b3e6f42c084666a43'
VARIANT = None