mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-31 12:01:36 +00:00
Merge remote-tracking branch 'origin' into yt-live-from-start-range
This commit is contained in:
@@ -195,6 +195,7 @@ class YoutubeDL:
|
||||
ap_password: Multiple-system operator account password.
|
||||
usenetrc: Use netrc for authentication instead.
|
||||
netrc_location: Location of the netrc file. Defaults to ~/.netrc.
|
||||
netrc_cmd: Use a shell command to get credentials
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
no_warnings: Do not print out anything for warnings.
|
||||
@@ -263,7 +264,7 @@ class YoutubeDL:
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
writedescription: Write the video description to a .description file
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
clean_infojson: Remove private fields from the infojson
|
||||
clean_infojson: Remove internal metadata from the infojson
|
||||
getcomments: Extract video comments. This will not be written to disk
|
||||
unless writeinfojson is also given
|
||||
writeannotations: Write the video annotations to a .annotations.xml file
|
||||
@@ -1291,17 +1292,17 @@ class YoutubeDL:
|
||||
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
|
||||
fmt = f'0{field_size_compat_map[key]:d}d'
|
||||
|
||||
if value is None:
|
||||
value = default
|
||||
elif replacement is not None:
|
||||
if None not in (value, replacement):
|
||||
try:
|
||||
value = replacement_formatter.format(replacement, value)
|
||||
except ValueError:
|
||||
value = na
|
||||
value, default = None, na
|
||||
|
||||
flags = outer_mobj.group('conversion') or ''
|
||||
str_fmt = f'{fmt[:-1]}s'
|
||||
if fmt[-1] == 'l': # list
|
||||
if value is None:
|
||||
value, fmt = default, 's'
|
||||
elif fmt[-1] == 'l': # list
|
||||
delim = '\n' if '#' in flags else ', '
|
||||
value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
|
||||
elif fmt[-1] == 'j': # json
|
||||
@@ -1332,17 +1333,19 @@ class YoutubeDL:
|
||||
value = str(value)[0]
|
||||
else:
|
||||
fmt = str_fmt
|
||||
elif fmt[-1] not in 'rs': # numeric
|
||||
elif fmt[-1] not in 'rsa': # numeric
|
||||
value = float_or_none(value)
|
||||
if value is None:
|
||||
value, fmt = default, 's'
|
||||
|
||||
if sanitize:
|
||||
# If value is an object, sanitize might convert it to a string
|
||||
# So we convert it to repr first
|
||||
if fmt[-1] == 'r':
|
||||
# If value is an object, sanitize might convert it to a string
|
||||
# So we convert it to repr first
|
||||
value, fmt = repr(value), str_fmt
|
||||
if fmt[-1] in 'csr':
|
||||
elif fmt[-1] == 'a':
|
||||
value, fmt = ascii(value), str_fmt
|
||||
if fmt[-1] in 'csra':
|
||||
value = sanitizer(initial_field, value)
|
||||
|
||||
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||
@@ -1408,7 +1411,7 @@ class YoutubeDL:
|
||||
|
||||
def _match_entry(self, info_dict, incomplete=False, silent=False):
|
||||
"""Returns None if the file should be downloaded"""
|
||||
_type = info_dict.get('_type', 'video')
|
||||
_type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
|
||||
assert incomplete or _type == 'video', 'Only video result can be considered complete'
|
||||
|
||||
video_title = info_dict.get('title', info_dict.get('id', 'entry'))
|
||||
@@ -1906,7 +1909,7 @@ class YoutubeDL:
|
||||
continue
|
||||
|
||||
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
|
||||
if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
|
||||
if not lazy and 'playlist-index' in self.params['compat_opts']:
|
||||
playlist_index = ie_result['requested_entries'][i]
|
||||
|
||||
entry_copy = collections.ChainMap(entry, {
|
||||
@@ -2668,7 +2671,8 @@ class YoutubeDL:
|
||||
format['dynamic_range'] = 'SDR'
|
||||
if format.get('aspect_ratio') is None:
|
||||
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
|
||||
if (info_dict.get('duration') and format.get('tbr')
|
||||
if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
|
||||
and info_dict.get('duration') and format.get('tbr')
|
||||
and not format.get('filesize') and not format.get('filesize_approx')):
|
||||
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
|
||||
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
|
||||
@@ -2807,11 +2811,13 @@ class YoutubeDL:
|
||||
new_info.update(fmt)
|
||||
offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
|
||||
end_time = offset + min(chapter.get('end_time', duration), duration)
|
||||
# duration may not be accurate. So allow deviations <1sec
|
||||
if end_time == float('inf') or end_time > offset + duration + 1:
|
||||
end_time = None
|
||||
if chapter or offset:
|
||||
new_info.update({
|
||||
'section_start': offset + chapter.get('start_time', 0),
|
||||
# duration may not be accurate. So allow deviations <1sec
|
||||
'section_end': end_time if end_time <= offset + duration + 1 else None,
|
||||
'section_end': end_time,
|
||||
'section_title': chapter.get('title'),
|
||||
'section_number': chapter.get('index'),
|
||||
})
|
||||
@@ -2963,8 +2969,7 @@ class YoutubeDL:
|
||||
print_field('url', 'urls')
|
||||
print_field('thumbnail', optional=True)
|
||||
print_field('description', optional=True)
|
||||
if filename:
|
||||
print_field('filename')
|
||||
print_field('filename')
|
||||
if self.params.get('forceduration') and info_copy.get('duration') is not None:
|
||||
self.to_stdout(formatSeconds(info_copy['duration']))
|
||||
print_field('format')
|
||||
@@ -3188,7 +3193,6 @@ class YoutubeDL:
|
||||
return
|
||||
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
requested_formats = info_dict['requested_formats']
|
||||
old_ext = info_dict['ext']
|
||||
if self.params.get('merge_output_format') is None:
|
||||
if (info_dict['ext'] == 'webm'
|
||||
@@ -3215,19 +3219,22 @@ class YoutubeDL:
|
||||
full_filename = correct_ext(full_filename)
|
||||
temp_filename = correct_ext(temp_filename)
|
||||
dl_filename = existing_video_file(full_filename, temp_filename)
|
||||
|
||||
info_dict['__real_download'] = False
|
||||
# NOTE: Copy so that original format dicts are not modified
|
||||
info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
|
||||
|
||||
merger = FFmpegMergerPP(self)
|
||||
downloaded = []
|
||||
if dl_filename is not None:
|
||||
self.report_file_already_downloaded(dl_filename)
|
||||
elif fd:
|
||||
for f in requested_formats if fd != FFmpegFD else []:
|
||||
for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
|
||||
f['filepath'] = fname = prepend_extension(
|
||||
correct_ext(temp_filename, info_dict['ext']),
|
||||
'f%s' % f['format_id'], info_dict['ext'])
|
||||
downloaded.append(fname)
|
||||
info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
|
||||
info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
|
||||
success, real_download = self.dl(temp_filename, info_dict)
|
||||
info_dict['__real_download'] = real_download
|
||||
else:
|
||||
@@ -3251,7 +3258,7 @@ class YoutubeDL:
|
||||
f'You have requested downloading multiple formats to stdout {reason}. '
|
||||
'The formats will be streamed one after the other')
|
||||
fname = temp_filename
|
||||
for f in requested_formats:
|
||||
for f in info_dict['requested_formats']:
|
||||
new_info = dict(info_dict)
|
||||
del new_info['requested_formats']
|
||||
new_info.update(f)
|
||||
@@ -3707,8 +3714,11 @@ class YoutubeDL:
|
||||
format_field(f, 'fps', '\t%d', func=round),
|
||||
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
|
||||
format_field(f, 'audio_channels', '\t%s'),
|
||||
delim,
|
||||
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
|
||||
delim, (
|
||||
format_field(f, 'filesize', ' \t%s', func=format_bytes)
|
||||
or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
|
||||
or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
|
||||
None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
|
||||
format_field(f, 'tbr', '\t%dk', func=round),
|
||||
shorten_protocol_name(f.get('protocol', '')),
|
||||
delim,
|
||||
@@ -4112,8 +4122,11 @@ class YoutubeDL:
|
||||
ret.append((thumb_filename, thumb_filename_final))
|
||||
t['filepath'] = thumb_filename
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, urllib.error.HTTPError) and err.code == 404:
|
||||
self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
|
||||
else:
|
||||
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
|
||||
thumbnails.pop(idx)
|
||||
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
|
||||
if ret and not write_all:
|
||||
break
|
||||
return ret
|
||||
|
||||
@@ -189,8 +189,8 @@ def validate_options(opts):
|
||||
raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"')
|
||||
|
||||
# Usernames and passwords
|
||||
validate(not opts.usenetrc or (opts.username is None and opts.password is None),
|
||||
'.netrc', msg='using {name} conflicts with giving username/password')
|
||||
validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc',
|
||||
msg='{name}, netrc command and username/password are mutually exclusive options')
|
||||
validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing')
|
||||
validate(opts.ap_password is None or opts.ap_username is not None,
|
||||
'TV Provider account username', msg='{name} missing')
|
||||
@@ -321,41 +321,56 @@ def validate_options(opts):
|
||||
opts.skip_download = None
|
||||
del opts.outtmpl['default']
|
||||
|
||||
def parse_chapters(name, value):
|
||||
chapters, ranges = [], []
|
||||
def parse_chapters(name, value, advanced=False):
|
||||
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
|
||||
TIMESTAMP_RE = r'''(?x)(?:
|
||||
(?P<start_sign>-?)(?P<start>[^-]+)
|
||||
)?\s*-\s*(?:
|
||||
(?P<end_sign>-?)(?P<end>[^-]+)
|
||||
)?'''
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
chapters, ranges, from_url = [], [], False
|
||||
for regex in value or []:
|
||||
if regex.startswith('*'):
|
||||
for range_ in map(str.strip, regex[1:].split(',')):
|
||||
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
|
||||
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
|
||||
if None in (dur or [None]):
|
||||
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
|
||||
ranges.append(dur)
|
||||
if advanced and regex == '*from-url':
|
||||
from_url = True
|
||||
continue
|
||||
elif regex.startswith('#'):
|
||||
for range_ in map(str.strip, regex[1:].split(',')):
|
||||
mobj = range_ != '-' and re.fullmatch(r'(-?[^-]+)\s*-\s*(-?[^-]+)?', range_)
|
||||
if not mobj:
|
||||
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "#start-end"')
|
||||
|
||||
start_section = parse_timestamp(mobj.group(1) or '0')
|
||||
end_section = parse_timestamp(mobj.group(2) or 'inf')
|
||||
if start_section is None or end_section is None:
|
||||
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "#start-end"')
|
||||
|
||||
ranges.append((current_time + start_section, current_time + end_section))
|
||||
elif not regex.startswith('*') or not regex.startswith('#'):
|
||||
try:
|
||||
chapters.append(re.compile(regex))
|
||||
except re.error as err:
|
||||
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
|
||||
continue
|
||||
try:
|
||||
chapters.append(re.compile(regex))
|
||||
except re.error as err:
|
||||
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
|
||||
return chapters, ranges
|
||||
|
||||
opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
|
||||
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
|
||||
for range_ in map(str.strip, regex[1:].split(',')):
|
||||
mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
|
||||
dur = mobj and [parse_timestamp(mobj.group('start') or '0'),
|
||||
parse_timestamp(mobj.group('end') or 'inf')]
|
||||
signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
|
||||
|
||||
err = None
|
||||
if None in (dur or [None]):
|
||||
err = 'Must be of the form "*start-end"'
|
||||
elif not advanced and any(signs):
|
||||
err = 'Negative timestamps are not allowed'
|
||||
elif regex.startswith('*'):
|
||||
dur[0] *= -1 if signs[0] else 1
|
||||
dur[1] *= -1 if signs[1] else 1
|
||||
if dur[1] == float('-inf'):
|
||||
err = '"-inf" is not a valid end'
|
||||
elif regex.startswith('#'):
|
||||
dur[0] = dur[0] * (-1 if signs[0] else 1) + current_time
|
||||
dur[1] = dur[1] * (-1 if signs[1] else 1) + current_time
|
||||
if dur[1] == float('-inf'):
|
||||
err = '"-inf" is not a valid end'
|
||||
if err:
|
||||
raise ValueError(f'invalid {name} time range "{regex}". {err}')
|
||||
ranges.append(dur)
|
||||
|
||||
return chapters, ranges, from_url
|
||||
|
||||
opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
|
||||
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
|
||||
|
||||
# Cookies from browser
|
||||
if opts.cookiesfrombrowser:
|
||||
@@ -757,6 +772,7 @@ def parse_options(argv=None):
|
||||
return ParsedOptions(parser, opts, urls, {
|
||||
'usenetrc': opts.usenetrc,
|
||||
'netrc_location': opts.netrc_location,
|
||||
'netrc_cmd': opts.netrc_cmd,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'twofactor': opts.twofactor,
|
||||
|
||||
5
yt_dlp/casefold.py
Normal file
5
yt_dlp/casefold.py
Normal file
@@ -0,0 +1,5 @@
|
||||
import warnings
|
||||
|
||||
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
|
||||
|
||||
casefold = str.casefold
|
||||
@@ -705,11 +705,11 @@ class _LinuxKeyring(Enum):
|
||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
|
||||
SelectedLinuxBackend
|
||||
"""
|
||||
KWALLET4 = auto() # this value is just called KWALLET in the chromium source but it is for KDE4 only
|
||||
KWALLET = auto() # KDE4
|
||||
KWALLET5 = auto()
|
||||
KWALLET6 = auto()
|
||||
GNOME_KEYRING = auto()
|
||||
BASIC_TEXT = auto()
|
||||
GNOMEKEYRING = auto()
|
||||
BASICTEXT = auto()
|
||||
|
||||
|
||||
SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
|
||||
@@ -803,7 +803,7 @@ def _choose_linux_keyring(logger):
|
||||
desktop_environment = _get_linux_desktop_environment(os.environ, logger)
|
||||
logger.debug(f'detected desktop environment: {desktop_environment.name}')
|
||||
if desktop_environment == _LinuxDesktopEnvironment.KDE4:
|
||||
linux_keyring = _LinuxKeyring.KWALLET4
|
||||
linux_keyring = _LinuxKeyring.KWALLET
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
|
||||
linux_keyring = _LinuxKeyring.KWALLET5
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
|
||||
@@ -811,9 +811,9 @@ def _choose_linux_keyring(logger):
|
||||
elif desktop_environment in (
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
|
||||
):
|
||||
linux_keyring = _LinuxKeyring.BASIC_TEXT
|
||||
linux_keyring = _LinuxKeyring.BASICTEXT
|
||||
else:
|
||||
linux_keyring = _LinuxKeyring.GNOME_KEYRING
|
||||
linux_keyring = _LinuxKeyring.GNOMEKEYRING
|
||||
return linux_keyring
|
||||
|
||||
|
||||
@@ -828,7 +828,7 @@ def _get_kwallet_network_wallet(keyring, logger):
|
||||
"""
|
||||
default_wallet = 'kdewallet'
|
||||
try:
|
||||
if keyring == _LinuxKeyring.KWALLET4:
|
||||
if keyring == _LinuxKeyring.KWALLET:
|
||||
service_name = 'org.kde.kwalletd'
|
||||
wallet_path = '/modules/kwalletd'
|
||||
elif keyring == _LinuxKeyring.KWALLET5:
|
||||
@@ -929,11 +929,11 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
|
||||
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
|
||||
logger.debug(f'Chosen keyring: {keyring.name}')
|
||||
|
||||
if keyring in (_LinuxKeyring.KWALLET4, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
|
||||
if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
|
||||
return _get_kwallet_password(browser_keyring_name, keyring, logger)
|
||||
elif keyring == _LinuxKeyring.GNOME_KEYRING:
|
||||
elif keyring == _LinuxKeyring.GNOMEKEYRING:
|
||||
return _get_gnome_keyring_password(browser_keyring_name, logger)
|
||||
elif keyring == _LinuxKeyring.BASIC_TEXT:
|
||||
elif keyring == _LinuxKeyring.BASICTEXT:
|
||||
# when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
|
||||
return None
|
||||
assert False, f'Unknown keyring {keyring}'
|
||||
@@ -1326,3 +1326,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
|
||||
self.add_cookie_header(cookie_req)
|
||||
return cookie_req.get_header('Cookie')
|
||||
|
||||
def clear(self, *args, **kwargs):
|
||||
with contextlib.suppress(KeyError):
|
||||
return super().clear(*args, **kwargs)
|
||||
|
||||
@@ -49,7 +49,6 @@ class FileDownloader:
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
continuedl: Attempt to continue downloads if possible
|
||||
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
|
||||
retries: Number of times to retry for expected network errors.
|
||||
Default is 0 for API, but 10 for CLI
|
||||
|
||||
@@ -173,6 +173,9 @@ class FragmentFD(FileDownloader):
|
||||
**self.params,
|
||||
'noprogress': True,
|
||||
'test': False,
|
||||
'sleep_interval': 0,
|
||||
'max_sleep_interval': 0,
|
||||
'sleep_interval_subtitles': 0,
|
||||
})
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
open_mode = 'wb'
|
||||
|
||||
@@ -7,9 +7,9 @@ from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
str_or_none,
|
||||
sanitized_Request,
|
||||
WebSocketsWrapper,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -497,6 +497,7 @@ from .dplay import (
|
||||
DiscoveryPlusItalyIE,
|
||||
DiscoveryPlusItalyShowIE,
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
GlobalCyclingNetworkPlusIE,
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
@@ -520,6 +521,7 @@ from .deuxm import (
|
||||
DeuxMNewsIE
|
||||
)
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .discogs import DiscogsReleasePlaylistIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
@@ -578,6 +580,7 @@ from .espn import (
|
||||
ESPNCricInfoIE,
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .ettutv import EttuTvIE
|
||||
from .europa import EuropaIE, EuroParlWebstreamIE
|
||||
from .europeantour import EuropeanTourIE
|
||||
from .eurosport import EurosportIE
|
||||
@@ -664,6 +667,7 @@ from .funimation import (
|
||||
FunimationShowIE,
|
||||
)
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
from .fusion import FusionIE
|
||||
from .fuyintv import FuyinTVIE
|
||||
from .gab import (
|
||||
@@ -1116,7 +1120,8 @@ from .mojvideo import MojvideoIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
MotherlessGroupIE
|
||||
MotherlessGroupIE,
|
||||
MotherlessGalleryIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
@@ -1257,6 +1262,7 @@ from .nhk import (
|
||||
NhkForSchoolProgramListIE,
|
||||
NhkRadioNewsPageIE,
|
||||
NhkRadiruIE,
|
||||
NhkRadiruLiveIE,
|
||||
)
|
||||
from .nhl import NHLIE
|
||||
from .nick import (
|
||||
@@ -1611,6 +1617,7 @@ from .rentv import (
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .rice import RICEIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
@@ -1625,6 +1632,7 @@ from .rottentomatoes import RottenTomatoesIE
|
||||
from .rozhlas import (
|
||||
RozhlasIE,
|
||||
RozhlasVltavaIE,
|
||||
MujRozhlasIE,
|
||||
)
|
||||
from .rte import RteIE, RteRadioIE
|
||||
from .rtlnl import (
|
||||
@@ -2422,7 +2430,10 @@ from .yandexvideo import (
|
||||
ZenYandexChannelIE,
|
||||
)
|
||||
from .yapfiles import YapFilesIE
|
||||
from .yappy import YappyIE
|
||||
from .yappy import (
|
||||
YappyIE,
|
||||
YappyProfileIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
@@ -2440,6 +2451,10 @@ from .younow import (
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .zaiko import (
|
||||
ZaikoIE,
|
||||
ZaikoETicketIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
|
||||
@@ -40,28 +40,33 @@ class ACastBaseIE(InfoExtractor):
|
||||
|
||||
class ACastIE(ACastBaseIE):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'''(?x)
|
||||
_VALID_URL = r'''(?x:
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||
'''
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
|
||||
)'''
|
||||
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
|
||||
'description': 'md5:013959207e05011ad14a222cf22278cc',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'creator': 'Third Ear Studio',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
|
||||
'episode_number': 2,
|
||||
'display_id': '2.raggarmordet-rosterurdetforflutna',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
@@ -73,6 +78,23 @@ class ACastIE(ACastBaseIE):
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://ausi.anu.edu.au/news/democracy-sausage-episode-can-labor-be-long-form-government',
|
||||
'info_dict': {
|
||||
'id': '646c68fb21fbf20011e9c651',
|
||||
'ext': 'mp3',
|
||||
'creator': 'The Australian National University',
|
||||
'display_id': 'can-labor-be-a-long-form-government',
|
||||
'duration': 2618,
|
||||
'thumbnail': 'https://assets.pippa.io/shows/6113e8578b4903809f16f7e5/1684821529295-515b9520db9ce53275b995eb302f941c.jpeg',
|
||||
'title': 'Can Labor be a long-form government?',
|
||||
'episode': 'Can Labor be a long-form government?',
|
||||
'upload_date': '20230523',
|
||||
'series': 'Democracy Sausage with Mark Kenny',
|
||||
'timestamp': 1684826362,
|
||||
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = self._match_valid_url(url).groups()
|
||||
|
||||
@@ -76,59 +76,6 @@ class AfreecaTVIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
|
||||
'info_dict': {
|
||||
'id': '18650793',
|
||||
'ext': 'mp4',
|
||||
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '윈아디',
|
||||
'uploader_id': 'badkids',
|
||||
'duration': 107,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
|
||||
'info_dict': {
|
||||
'id': '10481652',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'duration': 6492,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '20160502_c4c62b9d_174361386_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 3601,
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '20160502_39e739bb_174361386_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160502',
|
||||
'duration': 2891,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
@@ -146,8 +93,8 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# PARTIAL_ADULT
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
@@ -161,16 +108,25 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['adult content'],
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/player/15055030',
|
||||
'only_matching': True,
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
|
||||
'upload_date': '20230108',
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -223,26 +179,21 @@ class AfreecaTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'alert\(["\']This video has been deleted', webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s has been deleted' % video_id, expected=True)
|
||||
|
||||
station_id = self._search_regex(
|
||||
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
|
||||
bbs_id = self._search_regex(
|
||||
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
|
||||
video_id = self._search_regex(
|
||||
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
|
||||
|
||||
partial_view = False
|
||||
adult_view = False
|
||||
for _ in range(2):
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/station/video/a/view',
|
||||
video_id, headers={'Referer': url}, data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
if traverse_obj(data, ('code', {int})) == -6221:
|
||||
raise ExtractorError('The VOD does not exist', expected=True)
|
||||
query = {
|
||||
'nTitleNo': video_id,
|
||||
'nStationNo': station_id,
|
||||
'nBbsNo': bbs_id,
|
||||
'nStationNo': data['station_no'],
|
||||
'nBbsNo': data['bbs_no'],
|
||||
}
|
||||
if partial_view:
|
||||
query['partialView'] = 'SKIP_ADULT'
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
@@ -15,7 +16,7 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
def _extract_feed_info(self, url):
|
||||
feed = self._download_json(
|
||||
url, None, 'Downloading Akamai AMP feed',
|
||||
'Unable to download Akamai AMP feed')
|
||||
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
@@ -73,8 +74,10 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
|
||||
@@ -49,7 +49,7 @@ class CiscoWebexIE(InfoExtractor):
|
||||
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
|
||||
video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
|
||||
|
||||
if urlh.status == 403:
|
||||
if urlh.getcode() == 403:
|
||||
if stream['code'] == 53004:
|
||||
self.raise_login_required()
|
||||
if stream['code'] == 53005:
|
||||
@@ -59,7 +59,7 @@ class CiscoWebexIE(InfoExtractor):
|
||||
'This video is protected by a password, use the --video-password option', expected=True)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
|
||||
|
||||
if urlh.status == 429:
|
||||
if urlh.getcode() == 429:
|
||||
self.raise_login_required(
|
||||
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
|
||||
method='cookies')
|
||||
|
||||
@@ -13,9 +13,11 @@ import netrc
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
@@ -34,6 +36,7 @@ from ..utils import (
|
||||
GeoUtils,
|
||||
HEADRequest,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
RetryManager,
|
||||
UnsupportedError,
|
||||
@@ -56,6 +59,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
netrc_from_content,
|
||||
network_exceptions,
|
||||
orderedSet,
|
||||
parse_bitrate,
|
||||
@@ -286,6 +290,7 @@ class InfoExtractor:
|
||||
channel_id: Id of the channel.
|
||||
channel_url: Full URL to a channel webpage.
|
||||
channel_follower_count: Number of followers of the channel.
|
||||
channel_is_verified: Whether the channel is verified on the platform.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{tag: subformats}. "tag" is usually a language code, and
|
||||
@@ -470,8 +475,8 @@ class InfoExtractor:
|
||||
|
||||
|
||||
Subclasses of this should also be added to the list of extractors and
|
||||
should define a _VALID_URL regexp and, re-define the _real_extract() and
|
||||
(optionally) _real_initialize() methods.
|
||||
should define _VALID_URL as a regexp or a Sequence of regexps, and
|
||||
re-define the _real_extract() and (optionally) _real_initialize() methods.
|
||||
|
||||
Subclasses may also override suitable() if necessary, but ensure the function
|
||||
signature is preserved and that this function imports everything it needs
|
||||
@@ -534,7 +539,7 @@ class InfoExtractor:
|
||||
_EMBED_REGEX = []
|
||||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
@@ -561,8 +566,8 @@ class InfoExtractor:
|
||||
# we have cached the regexp for *this* class, whereas getattr would also
|
||||
# match the superclass
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
return cls._VALID_URL_RE.match(url)
|
||||
cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
|
||||
return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None)
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@@ -1290,45 +1295,48 @@ class InfoExtractor:
|
||||
return clean_html(res)
|
||||
|
||||
def _get_netrc_login_info(self, netrc_machine=None):
|
||||
username = None
|
||||
password = None
|
||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||
|
||||
if self.get_param('usenetrc', False):
|
||||
try:
|
||||
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
|
||||
if os.path.isdir(netrc_file):
|
||||
netrc_file = os.path.join(netrc_file, '.netrc')
|
||||
info = netrc.netrc(file=netrc_file).authenticators(netrc_machine)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError(
|
||||
'No authenticators for %s' % netrc_machine)
|
||||
except (OSError, netrc.NetrcParseError) as err:
|
||||
self.report_warning(
|
||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||
cmd = self.get_param('netrc_cmd')
|
||||
if cmd:
|
||||
cmd = cmd.replace('{}', netrc_machine)
|
||||
self.to_screen(f'Executing command: {cmd}')
|
||||
stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
|
||||
if ret != 0:
|
||||
raise OSError(f'Command returned error code {ret}')
|
||||
info = netrc_from_content(stdout).authenticators(netrc_machine)
|
||||
|
||||
return username, password
|
||||
elif self.get_param('usenetrc', False):
|
||||
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
|
||||
if os.path.isdir(netrc_file):
|
||||
netrc_file = os.path.join(netrc_file, '.netrc')
|
||||
info = netrc.netrc(netrc_file).authenticators(netrc_machine)
|
||||
|
||||
else:
|
||||
return None, None
|
||||
if not info:
|
||||
raise netrc.NetrcParseError(f'No authenticators for {netrc_machine}')
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
"""
|
||||
Get the login info as (username, password)
|
||||
First look for the manually specified credentials using username_option
|
||||
and password_option as keys in params dictionary. If no such credentials
|
||||
available look in the netrc file using the netrc_machine or _NETRC_MACHINE
|
||||
value.
|
||||
are available try the netrc_cmd if it is defined or look in the
|
||||
netrc file using the netrc_machine or _NETRC_MACHINE value.
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
username = self.get_param(username_option)
|
||||
if username is not None:
|
||||
password = self.get_param(password_option)
|
||||
else:
|
||||
username, password = self._get_netrc_login_info(netrc_machine)
|
||||
|
||||
try:
|
||||
username, password = self._get_netrc_login_info(netrc_machine)
|
||||
except (OSError, netrc.NetrcParseError) as err:
|
||||
self.report_warning(f'Failed to parse .netrc: {err}')
|
||||
return None, None
|
||||
return username, password
|
||||
|
||||
def _get_tfa_info(self, note='two-factor verification code'):
|
||||
|
||||
35
yt_dlp/extractor/discogs.py
Normal file
35
yt_dlp/extractor/discogs.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import traverse_obj
|
||||
|
||||
|
||||
class DiscogsReleasePlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?discogs\.com/(?P<type>release|master)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discogs.com/release/1-The-Persuader-Stockholm',
|
||||
'info_dict': {
|
||||
'id': 'release1',
|
||||
'title': 'Stockholm',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://www.discogs.com/master/113-Vince-Watson-Moments-In-Time',
|
||||
'info_dict': {
|
||||
'id': 'master113',
|
||||
'title': 'Moments In Time',
|
||||
},
|
||||
'playlist_mincount': 53,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
|
||||
|
||||
display_id = f'{playlist_type}{playlist_id}'
|
||||
response = self._download_json(
|
||||
f'https://api.discogs.com/{playlist_type}s/{playlist_id}', display_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(video['uri'], YoutubeIE, video_title=video.get('title'))
|
||||
for video in traverse_obj(response, ('videos', lambda _, v: YoutubeIE.suitable(v['uri'])))]
|
||||
|
||||
return self.playlist_result(entries, display_id, response.get('title'))
|
||||
@@ -65,6 +65,7 @@ class DPlayBaseIE(InfoExtractor):
|
||||
return streaming_list
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''):
|
||||
country = self.get_param('geo_bypass_country') or country
|
||||
geo_countries = [country.upper()]
|
||||
self._initialize_geo_bypass({
|
||||
'countries': geo_countries,
|
||||
@@ -1001,3 +1002,39 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
|
||||
_SHOW_STR = 'show'
|
||||
_INDEX = 4
|
||||
_VIDEO_IE = DiscoveryPlusIndiaIE
|
||||
|
||||
|
||||
class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
|
||||
'info_dict': {
|
||||
'id': '1397691',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Athertons: Mountain Biking\'s Fastest Family',
|
||||
'description': 'md5:75a81937fcd8b989eec6083a709cd837',
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
|
||||
'series': 'gcn',
|
||||
'creator': 'Gcn',
|
||||
'upload_date': '20210309',
|
||||
'timestamp': 1615248000,
|
||||
'duration': 2531.0,
|
||||
'tags': [],
|
||||
},
|
||||
'skip': 'Subscription required',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'web'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
|
||||
'realm': 'gcn',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VHXEmbedIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_elements_by_class,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@@ -162,12 +166,13 @@ class DropoutIE(InfoExtractor):
|
||||
|
||||
|
||||
class DropoutSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)'
|
||||
_PAGE_SIZE = 24
|
||||
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:(?P<season>[0-9]+)/?$)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
|
||||
'note': 'Multi-season series with the season in the url',
|
||||
'playlist_count': 17,
|
||||
'playlist_count': 24,
|
||||
'info_dict': {
|
||||
'id': 'dimension-20-fantasy-high-season-1',
|
||||
'title': 'Dimension 20 Fantasy High - Season 1'
|
||||
@@ -176,7 +181,7 @@ class DropoutSeasonIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
|
||||
'note': 'Multi-season series with the season not in the url',
|
||||
'playlist_count': 17,
|
||||
'playlist_count': 24,
|
||||
'info_dict': {
|
||||
'id': 'dimension-20-fantasy-high-season-1',
|
||||
'title': 'Dimension 20 Fantasy High - Season 1'
|
||||
@@ -190,29 +195,30 @@ class DropoutSeasonIE(InfoExtractor):
|
||||
'id': 'dimension-20-shriek-week-season-1',
|
||||
'title': 'Dimension 20 Shriek Week - Season 1'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3',
|
||||
'note': 'Multi-season series with season in the url that requires pagination',
|
||||
'playlist_count': 25,
|
||||
'info_dict': {
|
||||
'id': 'breaking-news-no-laugh-newsroom-season-3',
|
||||
'title': 'Breaking News No Laugh Newsroom - Season 3'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _fetch_page(self, url, season_id, page):
|
||||
page += 1
|
||||
webpage = self._download_webpage(
|
||||
f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400})
|
||||
yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj(
|
||||
get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))]
|
||||
|
||||
def _real_extract(self, url):
|
||||
season_id = self._match_id(url)
|
||||
season_num = self._match_valid_url(url).group('season') or 1
|
||||
season_title = season_id.replace('-', ' ').title()
|
||||
webpage = self._download_webpage(url, season_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']',
|
||||
item, 'item_url'),
|
||||
ie=DropoutIE.ie_key()
|
||||
) for item in get_elements_by_class('js-collection-item', webpage)
|
||||
]
|
||||
|
||||
seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '')
|
||||
current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>',
|
||||
seasons, 'current_season', default='').strip()
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')),
|
||||
'title': join_nonempty(season_title, current_season, delim=' - '),
|
||||
'entries': entries
|
||||
}
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE),
|
||||
f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
|
||||
/(?:mediabase|embed|item)/|
|
||||
(?:/toppers|/latest|/?)\?selectedId=
|
||||
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
|
||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||
@@ -16,6 +21,9 @@ class DumpertIE(InfoExtractor):
|
||||
'title': 'Ik heb nieuws voor je',
|
||||
'description': 'Niet schrikken hoor',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 9,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
|
||||
@@ -26,6 +34,28 @@ class DumpertIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/item/100031688_b317a185',
|
||||
'info_dict': {
|
||||
'id': '100031688/b317a185',
|
||||
'ext': 'mp4',
|
||||
'title': 'Epic schijnbeweging',
|
||||
'description': '<p>Die zag je niet eh</p>',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'duration': 12,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/latest?selectedId=100031688_b317a185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -36,18 +66,23 @@ class DumpertIE(InfoExtractor):
|
||||
title = item['title']
|
||||
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
|
||||
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p', '1080p'])
|
||||
formats = []
|
||||
for variant in media.get('variants', []):
|
||||
uri = variant.get('uri')
|
||||
if not uri:
|
||||
continue
|
||||
version = variant.get('version')
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': version,
|
||||
'quality': quality(version),
|
||||
})
|
||||
preference = quality(version)
|
||||
if determine_ext(uri) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', m3u8_id=version, quality=preference))
|
||||
else:
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': version,
|
||||
'quality': preference,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
stills = item.get('stills') or {}
|
||||
|
||||
60
yt_dlp/extractor/ettutv.py
Normal file
60
yt_dlp/extractor/ettutv.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import bool_or_none, traverse_obj, unified_timestamp, url_or_none
|
||||
|
||||
|
||||
class EttuTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ettu\.tv/[^?#]+/playerpage/(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ettu.tv/en-int/playerpage/1573849',
|
||||
'md5': '5874b7639a2aa866d1f6c3a4037c7c09',
|
||||
'info_dict': {
|
||||
'id': '1573849',
|
||||
'title': 'Ni Xia Lian - Shao Jieni',
|
||||
'description': 'ITTF Europe Top 16 Cup',
|
||||
'timestamp': 1677348600,
|
||||
'upload_date': '20230225',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ettu.tv/en-int/playerpage/1573753',
|
||||
'md5': '1fc094bf96cf2d5ec0f434d3a6dec9aa',
|
||||
'info_dict': {
|
||||
'id': '1573753',
|
||||
'title': 'Qiu Dang - Jorgic Darko',
|
||||
'description': 'ITTF Europe Top 16 Cup',
|
||||
'timestamp': 1677423600,
|
||||
'upload_date': '20230226',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_settings = self._download_json(
|
||||
f'https://www.ettu.tv/api/v3/contents/{video_id}/player-settings', video_id, query={
|
||||
'language': 'en',
|
||||
'showTitle': 'true',
|
||||
'device': 'desktop',
|
||||
})
|
||||
|
||||
stream_response = self._download_json(player_settings['streamAccess'], video_id, data={})
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
stream_response['data']['stream'], video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(player_settings, {
|
||||
'title': 'title',
|
||||
'description': ('metaInformation', 'competition'),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('date', {unified_timestamp}),
|
||||
'is_live': ('isLivestream', {bool_or_none}),
|
||||
})
|
||||
}
|
||||
@@ -7,8 +7,37 @@ from .common import InfoExtractor
|
||||
class FoxNewsIE(AMPIE):
|
||||
IE_NAME = 'foxnews'
|
||||
IE_DESC = 'Fox News and Fox Business Video'
|
||||
_VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://video\.(?:insider\.)?fox(?:news|business)\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://video.foxnews.com/v/6320653836112',
|
||||
'info_dict': {
|
||||
'id': '6320653836112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tucker Carlson joins \'Gutfeld!\' to discuss his new documentary',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 404,
|
||||
'upload_date': '20230217',
|
||||
'description': 'md5:858a8a36f59e9ca897d758855bcdfa02',
|
||||
'timestamp': 1676611344.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
},
|
||||
{
|
||||
# From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
|
||||
'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
|
||||
'info_dict': {
|
||||
'id': '5099377331001',
|
||||
'ext': 'mp4',
|
||||
'title': '82416_censoring',
|
||||
'description': '82416_censoring',
|
||||
'upload_date': '20160826',
|
||||
'timestamp': 1472169708.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 521,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
||||
'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
|
||||
@@ -22,6 +51,7 @@ class FoxNewsIE(AMPIE):
|
||||
'upload_date': '20110503',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': '404 page',
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
|
||||
@@ -36,10 +66,7 @@ class FoxNewsIE(AMPIE):
|
||||
'upload_date': '20141204',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'm3u8 HTTP error 400 in web browser',
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
|
||||
@@ -49,11 +76,6 @@ class FoxNewsIE(AMPIE):
|
||||
'url': 'http://video.foxbusiness.com/v/4442309889001',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
|
||||
'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
@@ -67,10 +89,10 @@ class FoxNewsIE(AMPIE):
|
||||
yield f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._extract_feed_info(
|
||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
|
||||
f'https://api.foxnews.com/v3/video-player/{video_id}?callback=uid_{video_id}')
|
||||
info['id'] = video_id
|
||||
return info
|
||||
|
||||
@@ -78,6 +100,19 @@ class FoxNewsIE(AMPIE):
|
||||
class FoxNewsVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.foxnews.com/video/6328632286112',
|
||||
'info_dict': {
|
||||
'id': '6328632286112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Review: 2023 Toyota Prius Prime',
|
||||
'duration': 155,
|
||||
'thumbnail': r're:^https://.+\.jpg$',
|
||||
'timestamp': 1685720177.0,
|
||||
'upload_date': '20230602',
|
||||
'description': 'md5:b69aafb125b41c1402e9744f53d6edc4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.foxnews.com/video/6313058664112',
|
||||
'info_dict': {
|
||||
'id': '6313058664112',
|
||||
@@ -89,8 +124,7 @@ class FoxNewsVideoIE(InfoExtractor):
|
||||
'title': 'Gutfeld! - Thursday, September 29',
|
||||
'timestamp': 1664527538,
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': '404 page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -104,19 +138,22 @@ class FoxNewsArticleIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
# data-video-id
|
||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
|
||||
'url': 'https://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': 'd2dd6ce809cedeefa96460e964821437',
|
||||
'info_dict': {
|
||||
'id': '5116295019001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||
'description': 'Veterans react on \'The Kelly File\'',
|
||||
'description': 'Veterans and Fox News host Dana Perino react on \'The Kelly File\' to NBC\'s presidential forum',
|
||||
'timestamp': 1473301045,
|
||||
'upload_date': '20160908',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 426,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
|
||||
'url': 'https://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
|
||||
'info_dict': {
|
||||
'id': '5748266721001',
|
||||
'ext': 'flv',
|
||||
@@ -127,9 +164,7 @@ class FoxNewsArticleIE(InfoExtractor):
|
||||
'timestamp': 1520594670,
|
||||
'upload_date': '20180309',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 page',
|
||||
}, {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'only_matching': True,
|
||||
|
||||
79
yt_dlp/extractor/funker530.py
Normal file
79
yt_dlp/extractor/funker530.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from .common import InfoExtractor
|
||||
from .rumble import RumbleEmbedIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import ExtractorError, clean_html, get_element_by_class, strip_or_none
|
||||
|
||||
|
||||
class Funker530IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funker530\.com/video/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://funker530.com/video/azov-patrol-caught-in-open-under-automatic-grenade-launcher-fire/',
|
||||
'md5': '085f50fea27523a388bbc22e123e09c8',
|
||||
'info_dict': {
|
||||
'id': 'v2qbmu4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Azov Patrol Caught In Open Under Automatic Grenade Launcher Fire',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Funker530',
|
||||
'channel': 'Funker530',
|
||||
'channel_url': 'https://rumble.com/c/c-1199543',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
'fps': 25,
|
||||
'duration': 27,
|
||||
'upload_date': '20230608',
|
||||
'timestamp': 1686241321,
|
||||
'live_status': 'not_live',
|
||||
'description': 'md5:bea2e1f458095414e04b5ac189c2f980',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://funker530.com/video/my-friends-joined-the-russians-civdiv/',
|
||||
'md5': 'a42c2933391210662e93e867d7124b70',
|
||||
'info_dict': {
|
||||
'id': 'k-pk4bOvoac',
|
||||
'ext': 'mp4',
|
||||
'view_count': int,
|
||||
'channel': 'Civ Div',
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/k-pk4bOvoac/maxresdefault.jpg',
|
||||
'uploader_id': '@CivDiv',
|
||||
'duration': 357,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCgsCiwJ88up-YyMHo7hL5-A',
|
||||
'tags': [],
|
||||
'uploader_url': 'https://www.youtube.com/@CivDiv',
|
||||
'channel_id': 'UCgsCiwJ88up-YyMHo7hL5-A',
|
||||
'like_count': int,
|
||||
'description': 'md5:aef75ec3f59c07a0e39400f609b24429',
|
||||
'live_status': 'not_live',
|
||||
'age_limit': 0,
|
||||
'uploader': 'Civ Div',
|
||||
'categories': ['People & Blogs'],
|
||||
'title': 'My “Friends” joined the Russians.',
|
||||
'availability': 'public',
|
||||
'upload_date': '20230608',
|
||||
'playable_in_embed': True,
|
||||
'heatmap': 'count:100',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
|
||||
if rumble_url:
|
||||
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
|
||||
else:
|
||||
youtube_url = list(YoutubeIE._extract_embed_urls(url, webpage))
|
||||
if youtube_url:
|
||||
info = {'url': youtube_url[0], 'ie_key': YoutubeIE.ie_key()}
|
||||
if not info:
|
||||
raise ExtractorError('No videos found on webpage', expected=True)
|
||||
|
||||
return {
|
||||
**info,
|
||||
'_type': 'url_transparent',
|
||||
'description': strip_or_none(self._search_regex(
|
||||
r'(?s)(.+)About the Author', clean_html(get_element_by_class('video-desc-paragraph', webpage)),
|
||||
'description', default=None))
|
||||
}
|
||||
0
yt_dlp/extractor/globalplayer.py
Executable file → Normal file
0
yt_dlp/extractor/globalplayer.py
Executable file → Normal file
@@ -83,7 +83,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
(?:
|
||||
(?P<type>movies|sports|episode|(?P<tv>tv))/
|
||||
(?P<type>movies|sports|episode|(?P<tv>tv|shows))/
|
||||
(?(tv)(?:[^/?#]+/){2}|[^?#]*)
|
||||
)?
|
||||
[^/?#]+/
|
||||
@@ -122,6 +122,25 @@ class HotStarIE(HotStarBaseIE):
|
||||
'episode': 'Janhvi Targets Suman',
|
||||
'episode_number': 8,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843',
|
||||
'info_dict': {
|
||||
'id': '1000282843',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anupama, Anuj Share a Moment',
|
||||
'season': 'Chapter 1',
|
||||
'description': 'md5:8d74ed2248423b8b06d5c8add4d7a0c0',
|
||||
'timestamp': 1678149000,
|
||||
'channel': 'StarPlus',
|
||||
'series': 'Anupama',
|
||||
'season_number': 1,
|
||||
'season_id': 7399,
|
||||
'upload_date': '20230307',
|
||||
'episode': 'Anupama, Anuj Share a Moment',
|
||||
'episode_number': 853,
|
||||
'duration': 1272,
|
||||
'channel_id': 3,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||
'only_matching': True,
|
||||
@@ -139,6 +158,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'sports': 'match',
|
||||
'episode': 'episode',
|
||||
'tv': 'episode',
|
||||
'shows': 'episode',
|
||||
None: 'content',
|
||||
}
|
||||
|
||||
@@ -304,13 +324,16 @@ class HotStarPrefixIE(InfoExtractor):
|
||||
|
||||
class HotStarPlaylistIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/tv(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'info_dict': {
|
||||
'id': '3_2_26',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||
'only_matching': True,
|
||||
@@ -327,7 +350,7 @@ class HotStarPlaylistIE(HotStarBaseIE):
|
||||
|
||||
class HotStarSeasonIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:season'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
|
||||
'info_dict': {
|
||||
@@ -346,6 +369,9 @@ class HotStarSeasonIE(HotStarBaseIE):
|
||||
'id': '8208',
|
||||
},
|
||||
'playlist_mincount': 19,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -356,7 +382,7 @@ class HotStarSeasonIE(HotStarBaseIE):
|
||||
|
||||
class HotStarSeriesIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:series'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
|
||||
'info_dict': {
|
||||
@@ -375,6 +401,12 @@ class HotStarSeriesIE(HotStarBaseIE):
|
||||
'id': '435',
|
||||
},
|
||||
'playlist_mincount': 267,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/',
|
||||
'info_dict': {
|
||||
'id': '1260022017',
|
||||
},
|
||||
'playlist_mincount': 940,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,68 +1,83 @@
|
||||
import functools
|
||||
import urllib.parse
|
||||
import urllib.error
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
mimetype2ext,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/6671
|
||||
class IwaraBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'iwara'
|
||||
_USERTOKEN = None
|
||||
_MEDIATOKEN = None
|
||||
_NETRC_MACHINE = 'iwara'
|
||||
|
||||
def _get_user_token(self, invalidate=False):
|
||||
if not invalidate and self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
def _is_token_expired(self, token, token_type):
|
||||
# User token TTL == ~3 weeks, Media token TTL == ~1 hour
|
||||
if (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 120):
|
||||
self.to_screen(f'{token_type} token has expired')
|
||||
return True
|
||||
|
||||
def _get_user_token(self):
|
||||
username, password = self._get_login_info()
|
||||
IwaraBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||
if not IwaraBaseIE._USERTOKEN or invalidate:
|
||||
IwaraBaseIE._USERTOKEN = self._download_json(
|
||||
if not username or not password:
|
||||
return
|
||||
|
||||
user_token = IwaraBaseIE._USERTOKEN or self.cache.load(self._NETRC_MACHINE, username)
|
||||
if not user_token or self._is_token_expired(user_token, 'User'):
|
||||
response = self._download_json(
|
||||
'https://api.iwara.tv/user/login', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
headers={'Content-Type': 'application/json'}, data=json.dumps({
|
||||
'email': username,
|
||||
'password': password
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
}).encode(), expected_status=lambda x: True)
|
||||
user_token = traverse_obj(response, ('token', {str}))
|
||||
if not user_token:
|
||||
error = traverse_obj(response, ('message', {str}))
|
||||
if 'invalidLogin' in error:
|
||||
raise ExtractorError('Invalid login credentials', expected=True)
|
||||
else:
|
||||
raise ExtractorError(f'Iwara API said: {error or "nothing"}')
|
||||
|
||||
self.cache.store(self._NETRC_MACHINE, username, user_token)
|
||||
|
||||
IwaraBaseIE._USERTOKEN = user_token
|
||||
|
||||
def _get_media_token(self):
|
||||
self._get_user_token()
|
||||
if not IwaraBaseIE._USERTOKEN:
|
||||
return # user has not passed credentials
|
||||
|
||||
if not IwaraBaseIE._MEDIATOKEN or self._is_token_expired(IwaraBaseIE._MEDIATOKEN, 'Media'):
|
||||
IwaraBaseIE._MEDIATOKEN = self._download_json(
|
||||
'https://api.iwara.tv/user/token', None, note='Fetching media token',
|
||||
data=b'', headers={
|
||||
'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}',
|
||||
'Content-Type': 'application/json'
|
||||
})['token']
|
||||
})['accessToken']
|
||||
|
||||
self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN)
|
||||
return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'}
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False):
|
||||
if not invalidate and self._MEDIATOKEN:
|
||||
return self._MEDIATOKEN
|
||||
|
||||
IwaraBaseIE._MEDIATOKEN = self._download_json(
|
||||
'https://api.iwara.tv/user/token', None, note='Fetching media token',
|
||||
data=b'', # Need to have some data here, even if it's empty
|
||||
headers={
|
||||
'Authorization': f'Bearer {self._get_user_token()}',
|
||||
'Content-Type': 'application/json'
|
||||
})['accessToken']
|
||||
|
||||
return self._MEDIATOKEN
|
||||
def _perform_login(self, username, password):
|
||||
self._get_media_token()
|
||||
|
||||
|
||||
class IwaraIE(IwaraBaseIE):
|
||||
IE_NAME = 'iwara'
|
||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
# this video cannot be played because of migration
|
||||
'only_matching': True,
|
||||
'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
|
||||
'info_dict': {
|
||||
'id': 'k2ayoueezfkx6gvq',
|
||||
@@ -79,25 +94,29 @@ class IwaraIE(IwaraBaseIE):
|
||||
'timestamp': 1677843869,
|
||||
'modified_timestamp': 1679056362,
|
||||
},
|
||||
'skip': 'this video cannot be played because of migration',
|
||||
}, {
|
||||
'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
|
||||
'md5': '20691ce1473ec2766c0788e14c60ce66',
|
||||
'md5': '7645f966f069b8ec9210efd9130c9aad',
|
||||
'info_dict': {
|
||||
'id': '1ywe1sbkqwumpdxz5',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
'title': 'Aponia 阿波尼亚SEX Party Tonight 手动脱衣 大奶 裸腿',
|
||||
'description': 'md5:0c4c310f2e0592d68b9f771d348329ca',
|
||||
'uploader': '龙也zZZ',
|
||||
'title': 'Aponia アポニア SEX Party Tonight 手の脱衣 巨乳 ',
|
||||
'description': 'md5:3f60016fff22060eef1ef26d430b1f67',
|
||||
'uploader': 'Lyu ya',
|
||||
'uploader_id': 'user792540',
|
||||
'tags': [
|
||||
'uncategorized'
|
||||
],
|
||||
'like_count': 1809,
|
||||
'view_count': 25156,
|
||||
'comment_count': 1,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'timestamp': 1678732213,
|
||||
'modified_timestamp': 1679110271,
|
||||
'modified_timestamp': int,
|
||||
'thumbnail': 'https://files.iwara.tv/image/thumbnail/581d12b5-46f4-4f15-beb2-cfe2cde5d13d/thumbnail-00.jpg',
|
||||
'modified_date': '20230614',
|
||||
'upload_date': '20230313',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://iwara.tv/video/blggmfno8ghl725bg',
|
||||
@@ -112,12 +131,15 @@ class IwaraIE(IwaraBaseIE):
|
||||
'tags': [
|
||||
'pee'
|
||||
],
|
||||
'like_count': 192,
|
||||
'view_count': 12119,
|
||||
'comment_count': 0,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'timestamp': 1598880567,
|
||||
'modified_timestamp': 1598908995,
|
||||
'availability': 'needs_auth',
|
||||
'modified_timestamp': int,
|
||||
'upload_date': '20200831',
|
||||
'modified_date': '20230605',
|
||||
'thumbnail': 'https://files.iwara.tv/image/thumbnail/7693e881-d302-42a4-a780-f16d66b5dadd/thumbnail-00.jpg',
|
||||
# 'availability': 'needs_auth',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -142,17 +164,16 @@ class IwaraIE(IwaraBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
username, password = self._get_login_info()
|
||||
headers = {
|
||||
'Authorization': f'Bearer {self._get_media_token()}',
|
||||
} if username and password else None
|
||||
video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True, headers=headers)
|
||||
username, _ = self._get_login_info()
|
||||
video_data = self._download_json(
|
||||
f'https://api.iwara.tv/video/{video_id}', video_id,
|
||||
expected_status=lambda x: True, headers=self._get_media_token())
|
||||
errmsg = video_data.get('message')
|
||||
# at this point we can actually get uploaded user info, but do we need it?
|
||||
if errmsg == 'errors.privateVideo':
|
||||
self.raise_login_required('Private video. Login if you have permissions to watch')
|
||||
self.raise_login_required('Private video. Login if you have permissions to watch', method='password')
|
||||
elif errmsg == 'errors.notFound' and not username:
|
||||
self.raise_login_required('Video may need login to view')
|
||||
self.raise_login_required('Video may need login to view', method='password')
|
||||
elif errmsg: # None if success
|
||||
raise ExtractorError(f'Iwara says: {errmsg}')
|
||||
|
||||
@@ -181,15 +202,6 @@ class IwaraIE(IwaraBaseIE):
|
||||
'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
IwaraBaseIE._USERTOKEN = self._get_user_token(True)
|
||||
self._get_media_token(True)
|
||||
self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN)
|
||||
|
||||
|
||||
class IwaraUserIE(IwaraBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
|
||||
@@ -200,12 +212,14 @@ class IwaraUserIE(IwaraBaseIE):
|
||||
'url': 'https://iwara.tv/profile/user792540/videos',
|
||||
'info_dict': {
|
||||
'id': 'user792540',
|
||||
'title': 'Lyu ya',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
'playlist_mincount': 70,
|
||||
}, {
|
||||
'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
|
||||
'info_dict': {
|
||||
'id': 'theblackbirdcalls',
|
||||
'title': 'TheBlackbirdCalls',
|
||||
},
|
||||
'playlist_mincount': 723,
|
||||
}, {
|
||||
@@ -214,6 +228,13 @@ class IwaraUserIE(IwaraBaseIE):
|
||||
}, {
|
||||
'url': 'https://iwara.tv/profile/theblackbirdcalls',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.iwara.tv/profile/lumymmd',
|
||||
'info_dict': {
|
||||
'id': 'lumymmd',
|
||||
'title': 'Lumy MMD',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
def _entries(self, playlist_id, user_id, page):
|
||||
@@ -225,7 +246,7 @@ class IwaraUserIE(IwaraBaseIE):
|
||||
'sort': 'date',
|
||||
'user': user_id,
|
||||
'limit': self._PER_PAGE,
|
||||
})
|
||||
}, headers=self._get_media_token())
|
||||
for x in traverse_obj(videos, ('results', ..., 'id')):
|
||||
yield self.url_result(f'https://iwara.tv/video/{x}')
|
||||
|
||||
@@ -244,7 +265,6 @@ class IwaraUserIE(IwaraBaseIE):
|
||||
|
||||
|
||||
class IwaraPlaylistIE(IwaraBaseIE):
|
||||
# the ID is an UUID but I don't think it's necessary to write concrete regex
|
||||
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
|
||||
IE_NAME = 'iwara:playlist'
|
||||
_PER_PAGE = 32
|
||||
@@ -260,7 +280,8 @@ class IwaraPlaylistIE(IwaraBaseIE):
|
||||
def _entries(self, playlist_id, first_page, page):
|
||||
videos = self._download_json(
|
||||
'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
|
||||
query={'page': page, 'limit': self._PER_PAGE}) if page else first_page
|
||||
query={'page': page, 'limit': self._PER_PAGE},
|
||||
headers=self._get_media_token()) if page else first_page
|
||||
for x in traverse_obj(videos, ('results', ..., 'id')):
|
||||
yield self.url_result(f'https://iwara.tv/video/{x}')
|
||||
|
||||
@@ -268,7 +289,7 @@ class IwaraPlaylistIE(IwaraBaseIE):
|
||||
playlist_id = self._match_id(url)
|
||||
page_0 = self._download_json(
|
||||
f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
|
||||
note='Requesting playlist info')
|
||||
note='Requesting playlist info', headers=self._get_media_token())
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import functools
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
@@ -12,7 +12,10 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@@ -52,38 +55,25 @@ class LBRYBaseIE(InfoExtractor):
|
||||
'/%s:%s' % (claim_name, claim_id))
|
||||
|
||||
def _parse_stream(self, stream, url):
|
||||
stream_value = stream.get('value') or {}
|
||||
stream_type = stream_value.get('stream_type')
|
||||
source = stream_value.get('source') or {}
|
||||
media = stream_value.get(stream_type) or {}
|
||||
signing_channel = stream.get('signing_channel') or {}
|
||||
channel_name = signing_channel.get('name')
|
||||
channel_claim_id = signing_channel.get('claim_id')
|
||||
channel_url = None
|
||||
if channel_name and channel_claim_id:
|
||||
channel_url = self._permanent_url(url, channel_name, channel_claim_id)
|
||||
stream_type = traverse_obj(stream, ('value', 'stream_type', {str}))
|
||||
|
||||
info = traverse_obj(stream, {
|
||||
'title': ('value', 'title', {str}),
|
||||
'thumbnail': ('value', 'thumbnail', 'url', {url_or_none}),
|
||||
'description': ('value', 'description', {str}),
|
||||
'license': ('value', 'license', {str}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'release_timestamp': ('value', 'release_time', {int_or_none}),
|
||||
'tags': ('value', 'tags', ..., {lambda x: x or None}),
|
||||
'duration': ('value', stream_type, 'duration', {int_or_none}),
|
||||
'channel': ('signing_channel', 'value', 'title', {str}),
|
||||
'channel_id': ('signing_channel', 'claim_id', {str}),
|
||||
})
|
||||
|
||||
channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
|
||||
if channel_name and info.get('channel_id'):
|
||||
info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
|
||||
|
||||
info = {
|
||||
'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': stream_value.get('description'),
|
||||
'license': stream_value.get('license'),
|
||||
'timestamp': int_or_none(stream.get('timestamp')),
|
||||
'release_timestamp': int_or_none(stream_value.get('release_time')),
|
||||
'tags': stream_value.get('tags'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
||||
'channel_id': channel_claim_id,
|
||||
'channel_url': channel_url,
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
}
|
||||
if stream_type == 'audio':
|
||||
info['vcodec'] = 'none'
|
||||
else:
|
||||
info.update({
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -186,6 +176,28 @@ class LBRYIE(LBRYBaseIE):
|
||||
'license': 'None',
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
# original quality format w/higher resolution than HLS formats
|
||||
'url': 'https://odysee.com/@wickedtruths:2/Biotechnological-Invasion-of-Skin-(April-2023):4',
|
||||
'md5': '305b0b3b369bde1b984961f005b67193',
|
||||
'info_dict': {
|
||||
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biotechnological Invasion of Skin (April 2023)',
|
||||
'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
|
||||
'channel': 'Wicked Truths',
|
||||
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
|
||||
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
|
||||
'timestamp': 1685790036,
|
||||
'upload_date': '20230603',
|
||||
'release_timestamp': 1685617473,
|
||||
'release_date': '20230601',
|
||||
'duration': 1063,
|
||||
'thumbnail': 'https://thumbs.odycdn.com/4e6d39da4df0cfdad45f64e253a15959.webp',
|
||||
'tags': ['smart skin surveillance', 'biotechnology invasion of skin', 'morgellons'],
|
||||
'license': 'None',
|
||||
'protocol': 'https', # test for direct mp4 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
@@ -221,41 +233,64 @@ class LBRYIE(LBRYBaseIE):
|
||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||
else:
|
||||
display_id = display_id.replace(':', '#')
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
display_id = urllib.parse.unquote(display_id)
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
headers = {'Referer': 'https://odysee.com/'}
|
||||
if result['value'].get('stream_type') in self._SUPPORTED_STREAM_TYPES:
|
||||
|
||||
formats = []
|
||||
stream_type = traverse_obj(result, ('value', 'stream_type', {str}))
|
||||
|
||||
if stream_type in self._SUPPORTED_STREAM_TYPES:
|
||||
claim_id, is_live = result['claim_id'], False
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
|
||||
# GET request returns original video/audio file if available
|
||||
ext = urlhandle_detect_ext(self._request_webpage(
|
||||
streaming_url, display_id, 'Checking for original quality', headers=headers))
|
||||
if ext != 'm3u8':
|
||||
formats.append({
|
||||
'url': streaming_url,
|
||||
'format_id': 'original',
|
||||
'quality': 1,
|
||||
**traverse_obj(result, ('value', {
|
||||
'ext': ('source', (('name', {determine_ext}), ('media_type', {mimetype2ext}))),
|
||||
'filesize': ('source', 'size', {int_or_none}),
|
||||
'width': ('video', 'width', {int_or_none}),
|
||||
'height': ('video', 'height', {int_or_none}),
|
||||
}), get_all=False),
|
||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
||||
})
|
||||
|
||||
# HEAD request returns redirect response to m3u8 URL if available
|
||||
final_url = self._request_webpage(
|
||||
HEADRequest(streaming_url), display_id, headers=headers,
|
||||
note='Downloading streaming redirect url info').geturl()
|
||||
|
||||
elif result.get('value_type') == 'stream':
|
||||
claim_id, is_live = result['signing_channel']['claim_id'], True
|
||||
live_data = self._download_json(
|
||||
'https://api.odysee.live/livestream/is_live', claim_id,
|
||||
query={'channel_claim_id': claim_id},
|
||||
note='Downloading livestream JSON metadata')['data']
|
||||
streaming_url = final_url = live_data.get('VideoURL')
|
||||
final_url = live_data.get('VideoURL')
|
||||
# Upcoming videos may still give VideoURL
|
||||
if not live_data.get('Live'):
|
||||
streaming_url = final_url = None
|
||||
final_url = None
|
||||
self.raise_no_formats('This stream is not live', True, claim_id)
|
||||
|
||||
else:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
info = self._parse_stream(result, url)
|
||||
if determine_ext(final_url) == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
final_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', live=is_live, headers=headers)
|
||||
else:
|
||||
info['url'] = streaming_url
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
final_url, display_id, 'mp4', m3u8_id='hls', live=is_live, headers=headers))
|
||||
|
||||
return {
|
||||
**info,
|
||||
**self._parse_stream(result, url),
|
||||
'id': claim_id,
|
||||
'title': result['value']['title'],
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
'http_headers': headers,
|
||||
}
|
||||
@@ -299,14 +334,12 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
if not (stream_claim_name and stream_claim_id):
|
||||
continue
|
||||
|
||||
info = self._parse_stream(item, url)
|
||||
info.update({
|
||||
yield {
|
||||
**self._parse_stream(item, url),
|
||||
'_type': 'url',
|
||||
'id': stream_claim_id,
|
||||
'title': try_get(item, lambda x: x['value']['title']),
|
||||
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
|
||||
})
|
||||
yield info
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
import base64
|
||||
import time
|
||||
import urllib.error
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_resolution,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,16 +30,18 @@ class MGTVIE(InfoExtractor):
|
||||
'duration': 7461,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://w.mgtv.com/b/427837/15588271.html',
|
||||
'info_dict': {
|
||||
'id': '15588271',
|
||||
'ext': 'mp4',
|
||||
'title': '春日迟迟再出发 沉浸版',
|
||||
'title': '春日迟迟再出发 沉浸版第1期:陆莹结婚半年查出肾炎被离婚 吴雅婷把一半票根退给前夫',
|
||||
'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'duration': 4026,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://w.mgtv.com/b/333652/7329822.html',
|
||||
'info_dict': {
|
||||
@@ -50,6 +52,7 @@ class MGTVIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'duration': 2656,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://w.mgtv.com/b/427837/15591647.html',
|
||||
'only_matching': True,
|
||||
@@ -64,6 +67,13 @@ class MGTVIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_RESOLUTIONS = {
|
||||
'标清': ('480p', '854x480'),
|
||||
'高清': ('540p', '960x540'),
|
||||
'超清': ('720p', '1280x720'),
|
||||
'蓝光': ('1080p', '1920x1080'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
tk2 = base64.urlsafe_b64encode(
|
||||
@@ -76,55 +86,60 @@ class MGTVIE(InfoExtractor):
|
||||
'type': 'pch5'
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||
error = self._parse_json(e.cause.read().decode(), None)
|
||||
if error.get('code') == 40005:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(error['msg'], expected=True)
|
||||
raise
|
||||
info = api_data['info']
|
||||
title = info['title'].strip()
|
||||
|
||||
stream_data = self._download_json(
|
||||
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
||||
'pm2': api_data['atc']['pm2'],
|
||||
'tk2': tk2,
|
||||
'pm2': api_data['atc']['pm2'],
|
||||
'video_id': video_id,
|
||||
'type': 'pch5',
|
||||
'src': 'intelmgtv',
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
stream_domain = stream_data['stream_domain'][0]
|
||||
stream_domain = traverse_obj(stream_data, ('stream_domain', ..., {url_or_none}), get_all=False)
|
||||
|
||||
formats = []
|
||||
for idx, stream in enumerate(stream_data['stream']):
|
||||
stream_path = stream.get('url')
|
||||
if not stream_path:
|
||||
continue
|
||||
format_data = self._download_json(
|
||||
stream_domain + stream_path, video_id,
|
||||
note=f'Download video info for format #{idx}')
|
||||
format_url = format_data.get('info')
|
||||
for idx, stream in enumerate(traverse_obj(stream_data, ('stream', lambda _, v: v['url']))):
|
||||
stream_name = traverse_obj(stream, 'name', 'standardName', 'barName', expected_type=str)
|
||||
resolution = traverse_obj(
|
||||
self._RESOLUTIONS, (stream_name, 1 if stream.get('scale') == '16:9' else 0))
|
||||
format_url = traverse_obj(self._download_json(
|
||||
urljoin(stream_domain, stream['url']), video_id, fatal=False,
|
||||
note=f'Downloading video info for format {resolution or stream_name}'),
|
||||
('info', {url_or_none}))
|
||||
if not format_url:
|
||||
continue
|
||||
tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
|
||||
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
|
||||
formats.append({
|
||||
'format_id': compat_str(tbr or idx),
|
||||
'url': url_or_none(format_url),
|
||||
'format_id': str(tbr or idx),
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'tbr': tbr,
|
||||
'vcodec': stream.get('videoFormat'),
|
||||
'acodec': stream.get('audioFormat'),
|
||||
**parse_resolution(resolution),
|
||||
'protocol': 'm3u8_native',
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
'format_note': stream.get('name'),
|
||||
'format_note': stream_name,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': info.get('desc'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'thumbnail': info.get('thumb'),
|
||||
**traverse_obj(api_data, ('info', {
|
||||
'title': ('title', {str.strip}),
|
||||
'description': ('desc', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
})),
|
||||
'subtitles': self.extract_subtitles(video_id, stream_domain),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,32 +1,39 @@
|
||||
import datetime
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
orderedSet,
|
||||
OnDemandPagedList,
|
||||
remove_end,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MotherlessIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/|G[VIG]?[A-F0-9]+/)?(?P<id>[A-F0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/AC3FFE1',
|
||||
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
|
||||
'url': 'http://motherless.com/EE97006',
|
||||
'md5': 'cb5e7438f7a3c4e886b7bccc1292a3bc',
|
||||
'info_dict': {
|
||||
'id': 'AC3FFE1',
|
||||
'id': 'EE97006',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fucked in the ass while playing PS3',
|
||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||
'upload_date': '20100913',
|
||||
'uploader_id': 'famouslyfuckedup',
|
||||
'title': 'Dogging blond Brit getting glazed (comp)',
|
||||
'categories': ['UK', 'slag', 'whore', 'dogging', 'cunt', 'cumhound', 'big tits', 'Pearl Necklace'],
|
||||
'upload_date': '20230519',
|
||||
'uploader_id': 'deathbird',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
# Incomplete cert chains
|
||||
'nocheckcertificate': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://motherless.com/532291B',
|
||||
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
|
||||
@@ -49,16 +56,36 @@ class MotherlessIE(InfoExtractor):
|
||||
'id': '633979F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turtlette',
|
||||
'categories': ['superheroine heroine superher'],
|
||||
'categories': ['superheroine heroine superher'],
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': 'shade0230',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'nocheckcertificate': True,
|
||||
},
|
||||
}, {
|
||||
# no keywords
|
||||
'url': 'http://motherless.com/8B4BBC1',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '8B4BBC1',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO00441.mp4',
|
||||
'categories': [],
|
||||
'upload_date': '20160214',
|
||||
'uploader_id': 'NMWildGirl',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'nocheckcertificate': True,
|
||||
},
|
||||
}, {
|
||||
# see https://motherless.com/videos/recent for recent videos with
|
||||
# uploaded date in "ago" format
|
||||
@@ -72,9 +99,12 @@ class MotherlessIE(InfoExtractor):
|
||||
'uploader_id': 'anonymous',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'nocheckcertificate': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -128,10 +158,8 @@ class MotherlessIE(InfoExtractor):
|
||||
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
|
||||
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
|
||||
webpage, 'uploader_id', fatal=False)
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||
if categories:
|
||||
categories = [cat.strip() for cat in categories.split(',')]
|
||||
categories = self._html_search_meta('keywords', webpage, default='')
|
||||
categories = [cat.strip() for cat in categories.split(',') if cat.strip()]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -148,102 +176,97 @@ class MotherlessIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class MotherlessGroupIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
||||
class MotherlessPaginatedIE(InfoExtractor):
|
||||
_PAGE_SIZE = 60
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _extract_entries(self, webpage, base):
|
||||
for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)',
|
||||
webpage):
|
||||
video_url = urllib.parse.urljoin(base, mobj.group('href'))
|
||||
video_id = MotherlessIE.get_temp_id(video_url)
|
||||
|
||||
if video_id:
|
||||
yield self.url_result(video_url, MotherlessIE, video_id, mobj.group('title'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
real_url = self._correct_path(url, item_id)
|
||||
webpage = self._download_webpage(real_url, item_id, 'Downloading page 1')
|
||||
|
||||
def get_page(idx):
|
||||
page = idx + 1
|
||||
current_page = webpage if not idx else self._download_webpage(
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
|
||||
yield from self._extract_entries(current_page, real_url)
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(get_page, self._PAGE_SIZE), item_id,
|
||||
remove_end(self._html_extract_title(webpage), ' | MOTHERLESS.COM ™'))
|
||||
|
||||
|
||||
class MotherlessGroupIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/g[vifm]?/(?P<id>[a-z0-9_]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/g/movie_scenes',
|
||||
'url': 'http://motherless.com/gv/movie_scenes',
|
||||
'info_dict': {
|
||||
'id': 'movie_scenes',
|
||||
'title': 'Movie Scenes',
|
||||
'description': 'Hot and sexy scenes from "regular" movies... '
|
||||
'Beautiful actresses fully nude... A looot of '
|
||||
'skin! :)Enjoy!',
|
||||
},
|
||||
'playlist_mincount': 662,
|
||||
'playlist_mincount': 540,
|
||||
}, {
|
||||
'url': 'http://motherless.com/gv/sex_must_be_funny',
|
||||
'url': 'http://motherless.com/g/sex_must_be_funny',
|
||||
'info_dict': {
|
||||
'id': 'sex_must_be_funny',
|
||||
'title': 'Sex must be funny',
|
||||
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
|
||||
'any kind!'
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'expected_warnings': [
|
||||
'This group has no videos.',
|
||||
]
|
||||
'playlist_count': 0,
|
||||
}, {
|
||||
'url': 'https://motherless.com/g/beautiful_cock',
|
||||
'url': 'https://motherless.com/gv/beautiful_cock',
|
||||
'info_dict': {
|
||||
'id': 'beautiful_cock',
|
||||
'title': 'Beautiful Cock',
|
||||
'description': 'Group for lovely cocks yours, mine, a friends anything human',
|
||||
},
|
||||
'playlist_mincount': 2500,
|
||||
'playlist_mincount': 2040,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if MotherlessIE.suitable(url)
|
||||
else super(MotherlessGroupIE, cls).suitable(url))
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/gv/{item_id}')
|
||||
|
||||
def _extract_entries(self, webpage, base):
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
|
||||
webpage):
|
||||
video_url = compat_urlparse.urljoin(base, mobj.group('href'))
|
||||
if not MotherlessIE.suitable(video_url):
|
||||
continue
|
||||
video_id = MotherlessIE._match_id(video_url)
|
||||
title = mobj.group('title')
|
||||
entries.append(self.url_result(
|
||||
video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
|
||||
video_title=title))
|
||||
# Alternative fallback
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(base, '/' + entry_id),
|
||||
ie=MotherlessIE.ie_key(), video_id=entry_id)
|
||||
for entry_id in orderedSet(re.findall(
|
||||
r'data-codename=["\']([A-Z0-9]+)', webpage))]
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
|
||||
webpage = self._download_webpage(page_url, group_id)
|
||||
title = self._search_regex(
|
||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, fatal=False)
|
||||
page_count = str_to_int(self._search_regex(
|
||||
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
|
||||
webpage, 'page_count', default=0))
|
||||
if not page_count:
|
||||
message = self._search_regex(
|
||||
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
|
||||
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
||||
self.report_warning(message, group_id)
|
||||
page_count = 1
|
||||
PAGE_SIZE = 80
|
||||
class MotherlessGalleryIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/G[VIG]?(?P<id>[A-F0-9]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://motherless.com/GV338999F',
|
||||
'info_dict': {
|
||||
'id': '338999F',
|
||||
'title': 'Random',
|
||||
},
|
||||
'playlist_mincount': 190,
|
||||
}, {
|
||||
'url': 'https://motherless.com/GVABD6213',
|
||||
'info_dict': {
|
||||
'id': 'ABD6213',
|
||||
'title': 'Cuties',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://motherless.com/GVBCF7622',
|
||||
'info_dict': {
|
||||
'id': 'BCF7622',
|
||||
'title': 'Vintage',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}, {
|
||||
'url': 'https://motherless.com/G035DE2F',
|
||||
'info_dict': {
|
||||
'id': '035DE2F',
|
||||
'title': 'General',
|
||||
},
|
||||
'playlist_mincount': 420,
|
||||
}]
|
||||
|
||||
def _get_page(idx):
|
||||
if idx > 0:
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
for entry in self._extract_entries(webpage, url):
|
||||
yield entry
|
||||
|
||||
playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': group_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': playlist
|
||||
}
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/GV{item_id}')
|
||||
|
||||
@@ -3,7 +3,7 @@ import json
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, parse_iso8601
|
||||
from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
|
||||
|
||||
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
|
||||
|
||||
@@ -65,19 +65,20 @@ class NebulaBaseIE(InfoExtractor):
|
||||
return response['token']
|
||||
|
||||
def _fetch_video_formats(self, slug):
|
||||
stream_info = self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/stream/',
|
||||
stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/',
|
||||
video_id=slug,
|
||||
auth_type='bearer',
|
||||
note='Fetching video stream info')
|
||||
manifest_url = stream_info['manifest']
|
||||
return self._extract_m3u8_formats_and_subtitles(manifest_url, slug)
|
||||
return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4')
|
||||
|
||||
def _build_video_info(self, episode):
|
||||
fmts, subs = self._fetch_video_formats(episode['slug'])
|
||||
channel_slug = episode['channel_slug']
|
||||
channel_title = episode['channel_title']
|
||||
zype_id = episode.get('zype_id')
|
||||
return {
|
||||
'id': episode['zype_id'],
|
||||
'id': remove_start(episode['id'], 'video_episode:'),
|
||||
'display_id': episode['slug'],
|
||||
'formats': fmts,
|
||||
'subtitles': subs,
|
||||
@@ -99,6 +100,9 @@ class NebulaBaseIE(InfoExtractor):
|
||||
'uploader_url': f'https://nebula.tv/{channel_slug}',
|
||||
'series': channel_title,
|
||||
'creator': channel_title,
|
||||
'extractor_key': NebulaIE.ie_key(),
|
||||
'extractor': NebulaIE.IE_NAME,
|
||||
'_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None,
|
||||
}
|
||||
|
||||
def _perform_login(self, username=None, password=None):
|
||||
@@ -113,7 +117,7 @@ class NebulaIE(NebulaBaseIE):
|
||||
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||
'md5': '14944cfee8c7beeea106320c47560efc',
|
||||
'info_dict': {
|
||||
'id': '5c271b40b13fd613090034fd',
|
||||
'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
|
||||
'ext': 'mp4',
|
||||
'title': 'That Time Disney Remade Beauty and the Beast',
|
||||
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
|
||||
@@ -137,22 +141,22 @@ class NebulaIE(NebulaBaseIE):
|
||||
'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||
'md5': 'd05739cf6c38c09322422f696b569c23',
|
||||
'info_dict': {
|
||||
'id': '5e7e78171aaf320001fbd6be',
|
||||
'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
|
||||
'ext': 'mp4',
|
||||
'title': 'Landing Craft - How The Allies Got Ashore',
|
||||
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
|
||||
'upload_date': '20200327',
|
||||
'timestamp': 1585348140,
|
||||
'channel': 'Real Engineering',
|
||||
'channel_id': 'realengineering',
|
||||
'uploader': 'Real Engineering',
|
||||
'uploader_id': 'realengineering',
|
||||
'series': 'Real Engineering',
|
||||
'channel': 'Real Engineering — The Logistics of D-Day',
|
||||
'channel_id': 'd-day',
|
||||
'uploader': 'Real Engineering — The Logistics of D-Day',
|
||||
'uploader_id': 'd-day',
|
||||
'series': 'Real Engineering — The Logistics of D-Day',
|
||||
'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||
'creator': 'Real Engineering',
|
||||
'creator': 'Real Engineering — The Logistics of D-Day',
|
||||
'duration': 841,
|
||||
'channel_url': 'https://nebula.tv/realengineering',
|
||||
'uploader_url': 'https://nebula.tv/realengineering',
|
||||
'channel_url': 'https://nebula.tv/d-day',
|
||||
'uploader_url': 'https://nebula.tv/d-day',
|
||||
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
|
||||
},
|
||||
},
|
||||
@@ -160,7 +164,7 @@ class NebulaIE(NebulaBaseIE):
|
||||
'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
|
||||
'md5': 'ebe28a7ad822b9ee172387d860487868',
|
||||
'info_dict': {
|
||||
'id': '5e779ebdd157bc0001d1c75a',
|
||||
'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1: The Draw',
|
||||
'description': r'contains:There’s free money on offer… if the players can all work together.',
|
||||
@@ -190,7 +194,7 @@ class NebulaIE(NebulaBaseIE):
|
||||
]
|
||||
|
||||
def _fetch_video_metadata(self, slug):
|
||||
return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
|
||||
return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/',
|
||||
video_id=slug,
|
||||
auth_type='bearer',
|
||||
note='Fetching video meta data')
|
||||
|
||||
@@ -2,12 +2,15 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
url_or_none
|
||||
)
|
||||
|
||||
|
||||
@@ -492,3 +495,73 @@ class NhkRadioNewsPageIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
|
||||
|
||||
|
||||
class NhkRadiruLiveIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)'
|
||||
_TESTS = [{
|
||||
# radio 1, no area specified
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
|
||||
'info_dict': {
|
||||
'id': 'r1-tokyo',
|
||||
'title': 're:^NHKネットラジオ第1 東京.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
# radio 2, area specified
|
||||
# (the area doesnt actually matter, r2 is national)
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
|
||||
'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
|
||||
'info_dict': {
|
||||
'id': 'r2-fukuoka',
|
||||
'title': 're:^NHKネットラジオ第2 福岡.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
# fm, area specified
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
|
||||
'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
|
||||
'info_dict': {
|
||||
'id': 'fm-sapporo',
|
||||
'title': 're:^NHKネットラジオFM 札幌.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
|
||||
'live_status': 'is_live',
|
||||
}
|
||||
}]
|
||||
|
||||
_NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
|
||||
|
||||
def _real_extract(self, url):
|
||||
station = self._match_id(url)
|
||||
area = self._configuration_arg('area', ['tokyo'])[0]
|
||||
|
||||
config = self._download_xml(
|
||||
'https://www.nhk.or.jp/radio/config/config_web.xml', station, 'Downloading area information')
|
||||
data = config.find(f'.//data//area[.="{area}"]/..')
|
||||
|
||||
if not data:
|
||||
raise ExtractorError('Invalid area. Valid areas are: %s' % ', '.join(
|
||||
[i.text for i in config.findall('.//data//area')]), expected=True)
|
||||
|
||||
noa_info = self._download_json(
|
||||
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
|
||||
station, note=f'Downloading {area} station metadata')
|
||||
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
|
||||
|
||||
return {
|
||||
'title': ' '.join(traverse_obj(present_info, (('service', 'area',), 'name', {str}))),
|
||||
'id': join_nonempty(station, area),
|
||||
'thumbnails': traverse_obj(present_info, ('service', 'images', ..., {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
})),
|
||||
'formats': self._extract_m3u8_formats(data.find(f'{station}hls').text, station),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
@@ -7,6 +9,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
qualities,
|
||||
@@ -15,6 +18,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -41,7 +45,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1545580896,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'title': 'Народная забава',
|
||||
'uploader': 'Nevata',
|
||||
'upload_date': '20181223',
|
||||
@@ -65,13 +69,14 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'title': str,
|
||||
'uploader': str,
|
||||
},
|
||||
'skip': 'vk extractor error',
|
||||
}, {
|
||||
# metadata in JSON
|
||||
# metadata in JSON, webm_dash with Firefox UA
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
|
||||
'md5': '8f477d8931c531374a3e36daec617b2c',
|
||||
'info_dict': {
|
||||
'id': '20079905452',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Культура меняет нас (прекрасный ролик!))',
|
||||
'thumbnail': str,
|
||||
'duration': 100,
|
||||
@@ -81,10 +86,14 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bv[ext=webm]',
|
||||
'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0'},
|
||||
},
|
||||
}, {
|
||||
# metadataUrl
|
||||
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
|
||||
'md5': 'f8c951122516af72e6e6ffdd3c41103b',
|
||||
'md5': '2bae2f58eefe1b3d26f3926c4a64d2f3',
|
||||
'info_dict': {
|
||||
'id': '63567059965189-0',
|
||||
'ext': 'mp4',
|
||||
@@ -98,10 +107,11 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'age_limit': 0,
|
||||
'start_time': 5,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
|
||||
'url': 'https://ok.ru/video/3952212382174',
|
||||
'md5': '91749d0bd20763a28d083fa335bbd37a',
|
||||
'md5': '5fb5f83ce16cb212d6bf887282b5da53',
|
||||
'info_dict': {
|
||||
'id': '5axVgHHDBvU',
|
||||
'ext': 'mp4',
|
||||
@@ -116,7 +126,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'live_status': 'not_live',
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
|
||||
'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
|
||||
'uploader_url': 'https://www.youtube.com/@MrKewlkid94',
|
||||
'channel_follower_count': int,
|
||||
'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
|
||||
'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
|
||||
@@ -145,7 +155,6 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Video has not been found',
|
||||
}, {
|
||||
# TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
|
||||
'note': 'Only available in mobile webpage',
|
||||
'url': 'https://m.ok.ru/video/2361249957145',
|
||||
'info_dict': {
|
||||
@@ -153,8 +162,8 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Быковское крещение',
|
||||
'duration': 3038.181,
|
||||
'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
|
||||
},
|
||||
'skip': 'HTTP Error 400',
|
||||
}, {
|
||||
'note': 'subtitles',
|
||||
'url': 'https://ok.ru/video/4249587550747',
|
||||
@@ -226,6 +235,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'skip': 'Site no longer embeds',
|
||||
}]
|
||||
|
||||
def _clear_cookies(self, cdn_url):
|
||||
# Direct http downloads will fail if CDN cookies are set
|
||||
# so we need to reset them after each format extraction
|
||||
self.cookiejar.clear(domain='.mycdn.me')
|
||||
self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname)
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for x in super()._extract_embed_urls(url, webpage):
|
||||
@@ -364,14 +379,22 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
'ext': 'mp4',
|
||||
'format_id': f['name'],
|
||||
} for f in metadata['videos']]
|
||||
'format_id': f.get('name'),
|
||||
} for f in traverse_obj(metadata, ('videos', lambda _, v: url_or_none(v['url'])))]
|
||||
|
||||
m3u8_url = metadata.get('hlsManifestUrl')
|
||||
m3u8_url = traverse_obj(metadata, 'hlsManifestUrl', 'ondemandHls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._clear_cookies(m3u8_url)
|
||||
|
||||
for mpd_id, mpd_key in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
|
||||
mpd_url = metadata.get(mpd_key)
|
||||
if mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||
self._clear_cookies(mpd_url)
|
||||
|
||||
dash_manifest = metadata.get('metadataEmbedded')
|
||||
if dash_manifest:
|
||||
@@ -390,6 +413,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
self._clear_cookies(m3u8_url)
|
||||
rtmp_url = metadata.get('rtmpUrl')
|
||||
if rtmp_url:
|
||||
formats.append({
|
||||
@@ -423,6 +447,10 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
r'data-video="(.+?)"', webpage, 'json data')
|
||||
json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
|
||||
|
||||
redirect_url = self._request_webpage(HEADRequest(
|
||||
json_data['videoSrc']), video_id, 'Requesting download URL').geturl()
|
||||
self._clear_cookies(redirect_url)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': json_data.get('videoName'),
|
||||
@@ -430,7 +458,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'thumbnail': json_data.get('videoPosterSrc'),
|
||||
'formats': [{
|
||||
'format_id': 'mobile',
|
||||
'url': json_data.get('videoSrc'),
|
||||
'url': redirect_url,
|
||||
'ext': 'mp4',
|
||||
}]
|
||||
}
|
||||
|
||||
94
yt_dlp/extractor/rheinmaintv.py
Normal file
94
yt_dlp/extractor/rheinmaintv.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, merge_dicts, remove_end
|
||||
|
||||
|
||||
class RheinMainTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/',
|
||||
'info_dict': {
|
||||
'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022',
|
||||
'ext': 'ismv', # ismv+isma will be merged into mp4
|
||||
'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft',
|
||||
'title': 'Auf dem Weg zur Deutschen Meisterschaft',
|
||||
'upload_date': '20221108',
|
||||
'view_count': int,
|
||||
'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9',
|
||||
'timestamp': 1667933057,
|
||||
'duration': 243.0,
|
||||
},
|
||||
'params': {'skip_download': 'ism'},
|
||||
}, {
|
||||
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
|
||||
'info_dict': {
|
||||
'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022',
|
||||
'ext': 'ismv',
|
||||
'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
|
||||
'timestamp': 1668526214,
|
||||
'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften',
|
||||
'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'duration': 345.0,
|
||||
'description': 'md5:9370ba29526984006c2cba1372e5c5a0',
|
||||
'upload_date': '20221115',
|
||||
},
|
||||
'params': {'skip_download': 'ism'},
|
||||
}, {
|
||||
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
|
||||
'info_dict': {
|
||||
'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022',
|
||||
'ext': 'ismv',
|
||||
'title': 'Casino Mainz bei den Deutschen Meisterschaften',
|
||||
'view_count': int,
|
||||
'timestamp': 1668527402,
|
||||
'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften',
|
||||
'upload_date': '20221115',
|
||||
'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften',
|
||||
'duration': 348.0,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa',
|
||||
},
|
||||
'params': {'skip_download': 'ism'},
|
||||
}, {
|
||||
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('video_id').replace('/', '-')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)',
|
||||
webpage, 'video', group=('source', 'img'))
|
||||
source = extract_attributes(source)
|
||||
img = extract_attributes(img)
|
||||
|
||||
raw_json_ld = list(self._yield_json_ld(webpage, video_id))
|
||||
json_ld = self._json_ld(raw_json_ld, video_id)
|
||||
json_ld.pop('url', None)
|
||||
|
||||
ism_manifest_url = (
|
||||
source.get('src')
|
||||
or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject')
|
||||
)
|
||||
formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id)
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title':
|
||||
self._html_search_regex(r'<h1><span class="title">([^<]*)</span>',
|
||||
webpage, 'headline', default=None)
|
||||
or img.get('title') or json_ld.get('title') or self._og_search_title(webpage)
|
||||
or remove_end(self._html_extract_title(webpage), ' -'),
|
||||
'alt_title': img.get('alt'),
|
||||
'description': json_ld.get('description') or self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'),
|
||||
}, json_ld)
|
||||
@@ -1,10 +1,15 @@
|
||||
import itertools
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -51,7 +56,40 @@ class RozhlasIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class RozhlasVltavaIE(InfoExtractor):
|
||||
class RozhlasBaseIE(InfoExtractor):
|
||||
def _extract_formats(self, entry, audio_id):
|
||||
formats = []
|
||||
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
|
||||
ext = audio.get('variant')
|
||||
for retry in self.RetryManager():
|
||||
if retry.attempt > 1:
|
||||
self._sleep(1, audio_id)
|
||||
try:
|
||||
if ext == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
audio['url'], audio_id, mpd_id=ext))
|
||||
elif ext == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
audio['url'], audio_id, 'm4a', m3u8_id=ext))
|
||||
else:
|
||||
formats.append({
|
||||
'url': audio['url'],
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'abr': int_or_none(audio.get('bitrate')),
|
||||
'acodec': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 429:
|
||||
retry.error = e.cause
|
||||
else:
|
||||
self.report_warning(e.msg)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
class RozhlasVltavaIE(RozhlasBaseIE):
|
||||
_VALID_URL = r'https?://(?:\w+\.rozhlas|english\.radio)\.cz/[\w-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
|
||||
@@ -168,33 +206,14 @@ class RozhlasVltavaIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _extract_video(self, entry):
|
||||
formats = []
|
||||
audio_id = entry['meta']['ga']['contentId']
|
||||
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
|
||||
ext = audio.get('variant')
|
||||
if ext == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
audio['url'], audio_id, mpd_id=ext, fatal=False))
|
||||
elif ext == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
audio['url'], audio_id, 'm4a', m3u8_id=ext, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': audio['url'],
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'abr': int_or_none(audio.get('bitrate')),
|
||||
'acodec': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none}))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
|
||||
'chapter_number': chapter_number,
|
||||
'formats': formats,
|
||||
'formats': self._extract_formats(entry, audio_id),
|
||||
**traverse_obj(entry, {
|
||||
'title': ('meta', 'ga', 'contentName'),
|
||||
'description': 'title',
|
||||
@@ -219,3 +238,106 @@ class RozhlasVltavaIE(InfoExtractor):
|
||||
'title': traverse_obj(data, ('series', 'title')),
|
||||
'entries': map(self._extract_video, data['playlist']),
|
||||
}
|
||||
|
||||
|
||||
class MujRozhlasIE(RozhlasBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?mujrozhlas\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# single episode extraction
|
||||
'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
|
||||
'md5': '6f8fd68663e64936623e67c152a669e0',
|
||||
'info_dict': {
|
||||
'id': '10739193',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
|
||||
'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
|
||||
'timestamp': 1684915200,
|
||||
'modified_timestamp': 1684922446,
|
||||
'series': 'Vykopávky',
|
||||
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
|
||||
'channel_id': 'radio-wave',
|
||||
'upload_date': '20230524',
|
||||
'modified_date': '20230524',
|
||||
},
|
||||
}, {
|
||||
# serial extraction
|
||||
'url': 'https://www.mujrozhlas.cz/radiokniha/jaroslava-janackova-pribeh-tajemneho-psani-o-pramenech-genezi-babicky',
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'bb2b5f4e-ffb4-35a6-a34a-046aa62d6f6b',
|
||||
'title': 'Jaroslava Janáčková: Příběh tajemného psaní. O pramenech a genezi Babičky',
|
||||
'description': 'md5:7434d8fac39ac9fee6df098e11dfb1be',
|
||||
},
|
||||
}, {
|
||||
# show extraction
|
||||
'url': 'https://www.mujrozhlas.cz/nespavci',
|
||||
'playlist_mincount': 14,
|
||||
'info_dict': {
|
||||
'id': '09db9b37-d0f4-368c-986a-d3439f741f08',
|
||||
'title': 'Nespavci',
|
||||
'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
|
||||
},
|
||||
}]
|
||||
|
||||
def _call_api(self, path, item_id, msg='API JSON'):
|
||||
return self._download_json(
|
||||
f'https://api.mujrozhlas.cz/{path}/{item_id}', item_id,
|
||||
note=f'Downloading {msg}', errnote=f'Failed to download {msg}')['data']
|
||||
|
||||
def _extract_audio_entry(self, entry):
|
||||
audio_id = entry['meta']['ga']['contentId']
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'formats': self._extract_formats(entry['attributes'], audio_id),
|
||||
**traverse_obj(entry, {
|
||||
'title': ('attributes', 'title'),
|
||||
'description': ('attributes', 'description'),
|
||||
'episode_number': ('attributes', 'part'),
|
||||
'series': ('attributes', 'mirroredShow', 'title'),
|
||||
'chapter': ('attributes', 'mirroredSerial', 'title'),
|
||||
'artist': ('meta', 'ga', 'contentAuthor'),
|
||||
'channel_id': ('meta', 'ga', 'contentCreator'),
|
||||
'timestamp': ('attributes', 'since', {unified_timestamp}),
|
||||
'modified_timestamp': ('attributes', 'updated', {unified_timestamp}),
|
||||
'thumbnail': ('attributes', 'asset', 'url', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
def _entries(self, api_url, playlist_id):
|
||||
for page in itertools.count(1):
|
||||
episodes = self._download_json(
|
||||
api_url, playlist_id, note=f'Downloading episodes page {page}',
|
||||
errnote=f'Failed to download episodes page {page}', fatal=False)
|
||||
for episode in traverse_obj(episodes, ('data', lambda _, v: v['meta']['ga']['contentId'])):
|
||||
yield self._extract_audio_entry(episode)
|
||||
api_url = traverse_obj(episodes, ('links', 'next', {url_or_none}))
|
||||
if not api_url:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
info = self._search_json(r'\bvar\s+dl\s*=', webpage, 'info json', display_id)
|
||||
|
||||
entity = info['siteEntityBundle']
|
||||
|
||||
if entity == 'episode':
|
||||
return self._extract_audio_entry(self._call_api(
|
||||
'episodes', info['contentId'], 'episode info API JSON'))
|
||||
|
||||
elif entity in ('show', 'serial'):
|
||||
playlist_id = info['contentShow'].split(':')[0] if entity == 'show' else info['contentId']
|
||||
data = self._call_api(f'{entity}s', playlist_id, f'{entity} playlist JSON')
|
||||
api_url = data['relationships']['episodes']['links']['related']
|
||||
return self.playlist_result(
|
||||
self._entries(api_url, playlist_id), playlist_id,
|
||||
**traverse_obj(data, ('attributes', {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
})))
|
||||
|
||||
else:
|
||||
# `entity == 'person'` not implemented yet by API, ref:
|
||||
# https://api.mujrozhlas.cz/persons/8367e456-2a57-379a-91bb-e699619bea49/participation
|
||||
raise ExtractorError(f'Unsupported entity type "{entity}"')
|
||||
|
||||
@@ -144,7 +144,7 @@ class RumbleEmbedIE(InfoExtractor):
|
||||
if embeds:
|
||||
return embeds
|
||||
return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
|
||||
r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
|
||||
r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -73,7 +73,10 @@ class ShemarooMeIE(InfoExtractor):
|
||||
key = bytes_to_intlist(compat_b64decode(data_json['key']))
|
||||
iv = [0] * 16
|
||||
m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii')
|
||||
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
|
||||
headers = {'stream_key': data_json['stream_key']}
|
||||
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers=headers)
|
||||
for fmt in formats:
|
||||
fmt['http_headers'] = headers
|
||||
|
||||
release_date = self._html_search_regex(
|
||||
(r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'),
|
||||
|
||||
@@ -10,6 +10,8 @@ from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
try_call,
|
||||
try_get,
|
||||
)
|
||||
|
||||
@@ -77,8 +79,10 @@ class SonyLIVIE(InfoExtractor):
|
||||
self._HEADERS['device_id'] = self._get_device_id()
|
||||
self._HEADERS['content-type'] = 'application/json'
|
||||
|
||||
if username.lower() == 'token' and len(password) > 1198:
|
||||
if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
|
||||
self._HEADERS['authorization'] = password
|
||||
self.report_login()
|
||||
return
|
||||
elif len(username) != 10 or not username.isdigit():
|
||||
raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')
|
||||
|
||||
|
||||
@@ -42,14 +42,13 @@ class StripchatIE(InfoExtractor):
|
||||
elif not traverse_obj(data, ('viewCam', 'model', 'isLive'), expected_type=bool):
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
server = traverse_obj(data, ('viewCam', 'viewServers', 'flashphoner-hls'), expected_type=str)
|
||||
model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int)
|
||||
|
||||
formats = []
|
||||
for host in traverse_obj(data, ('config', 'data', (
|
||||
(('features', 'featuresV2'), 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))):
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'https://b-{server}.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
|
||||
f'https://edge-hls.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
|
||||
video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
|
||||
if formats:
|
||||
break
|
||||
|
||||
@@ -2,7 +2,7 @@ import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_or_none, traverse_obj
|
||||
from ..utils import js_to_json, str_or_none, traverse_obj
|
||||
|
||||
|
||||
class SubstackIE(InfoExtractor):
|
||||
@@ -14,7 +14,7 @@ class SubstackIE(InfoExtractor):
|
||||
'id': '47660949',
|
||||
'ext': 'mp4',
|
||||
'title': 'I MADE A VLOG',
|
||||
'description': 'md5:10c01ff93439a62e70ce963b2aa0b7f6',
|
||||
'description': 'md5:9248af9a759321e1027226f988f54d96',
|
||||
'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18',
|
||||
'uploader': 'Maybe Baby',
|
||||
'uploader_id': '33628',
|
||||
@@ -77,7 +77,9 @@ class SubstackIE(InfoExtractor):
|
||||
display_id, username = self._match_valid_url(url).group('id', 'username')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_info = self._search_json(r'<script[^>]*>\s*window\._preloads\s*=', webpage, 'preloads', display_id)
|
||||
webpage_info = self._parse_json(self._search_json(
|
||||
r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string',
|
||||
display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id)
|
||||
|
||||
post_type = webpage_info['post']['type']
|
||||
formats, subtitles = [], {}
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
get_element_by_id,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -21,7 +26,15 @@ class SverigesRadioBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
audio_id, display_id = self._match_valid_url(url).group('id', 'slug')
|
||||
if not audio_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
audio_id = (
|
||||
traverse_obj(
|
||||
get_element_html_by_class('audio-button', webpage),
|
||||
({extract_attributes}, ('data-audio-id', 'data-publication-id')), get_all=False)
|
||||
or self._parse_json(get_element_by_id('gtm-metadata', webpage), display_id)['pageId'])
|
||||
|
||||
query = {
|
||||
'id': audio_id,
|
||||
'type': self._AUDIO_TYPE,
|
||||
@@ -30,7 +43,6 @@ class SverigesRadioBaseIE(InfoExtractor):
|
||||
item = self._download_json(
|
||||
self._BASE_URL + 'audiometadata', audio_id,
|
||||
'Downloading audio JSON metadata', query=query)['items'][0]
|
||||
title = item['subtitle']
|
||||
|
||||
query['format'] = 'iis'
|
||||
urls = []
|
||||
@@ -61,18 +73,20 @@ class SverigesRadioBaseIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'series': item.get('title'),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'thumbnail': item.get('displayimageurl'),
|
||||
'description': item.get('description'),
|
||||
**traverse_obj(item, {
|
||||
'title': 'subtitle',
|
||||
'series': 'title',
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('displayimageurl', {url_or_none}),
|
||||
'description': 'description',
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class SverigesRadioPublicationIE(SverigesRadioBaseIE):
|
||||
IE_NAME = 'sverigesradio:publication'
|
||||
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?(?:artikel|gruppsida)(?:\.aspx\?.*?\bartikel=(?P<id>[0-9]+)|/(?P<slug>[\w-]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
|
||||
'md5': '6a4917e1923fccb080e5a206a5afa542',
|
||||
@@ -85,6 +99,18 @@ class SverigesRadioPublicationIE(SverigesRadioBaseIE):
|
||||
'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://sverigesradio.se/artikel/tysk-fotbollsfeber-bayern-munchens-10-ariga-segersvit-kan-brytas',
|
||||
'md5': 'f8a914ad50f491bb74eed403ab4bfef6',
|
||||
'info_dict': {
|
||||
'id': '8360345',
|
||||
'ext': 'm4a',
|
||||
'title': 'Tysk fotbollsfeber när Bayern Münchens 10-åriga segersvit kan brytas',
|
||||
'series': 'Radiosporten',
|
||||
'description': 'md5:5254610e20ce527ecb3a6102a06dcc5f',
|
||||
'duration': 72,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
|
||||
'only_matching': True,
|
||||
@@ -94,8 +120,8 @@ class SverigesRadioPublicationIE(SverigesRadioBaseIE):
|
||||
|
||||
class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
|
||||
IE_NAME = 'sverigesradio:episode'
|
||||
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?:(?P<id>\d+)|(?P<slug>[\w-]+))(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
|
||||
'md5': '20dc4d8db24228f846be390b0c59a07c',
|
||||
'info_dict': {
|
||||
@@ -106,6 +132,18 @@ class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
|
||||
'title': 'Metoo och valen',
|
||||
'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://sverigesradio.se/avsnitt/p4-live-med-first-aid-kit-scandinavium-mars-2023',
|
||||
'md5': 'ce17fb82520a8033dbb846993d5589fe',
|
||||
'info_dict': {
|
||||
'id': '2160416',
|
||||
'ext': 'm4a',
|
||||
'title': 'P4 Live med First Aid Kit',
|
||||
'description': 'md5:6d5b78eed3d2b65f6de04daa45e9285d',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'series': 'P4 Live',
|
||||
'duration': 5640,
|
||||
},
|
||||
}]
|
||||
_AUDIO_TYPE = 'episode'
|
||||
|
||||
@@ -2,10 +2,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
UnsupportedError,
|
||||
extract_attributes,
|
||||
try_get,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -14,36 +16,38 @@ class TagesschauIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
||||
'md5': '7a7287612fa881a1ae1d087df45c2fd6',
|
||||
'md5': 'ccb9359bf8c4795836e43759f3408a93',
|
||||
'info_dict': {
|
||||
'id': 'video-102143-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
|
||||
'duration': 138,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
||||
'md5': '3c54c1f6243d279b706bde660ceec633',
|
||||
'md5': '5c15e8f3da049e48829ec9786d835536',
|
||||
'info_dict': {
|
||||
'id': 'ts-5727-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ganze Sendung',
|
||||
'duration': 932,
|
||||
},
|
||||
}, {
|
||||
# exclusive audio
|
||||
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
|
||||
'md5': '4cf22023c285f35e99c24d290ba58cc9',
|
||||
'md5': '4bff8f23504df56a0d86ed312d654182',
|
||||
'info_dict': {
|
||||
'id': 'audio-29417-1',
|
||||
'ext': 'mp3',
|
||||
'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
|
||||
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
|
||||
'md5': '12cfb212d9325b5ba0d52b625f1aa61c',
|
||||
'md5': 'f049fa1698d7564e9ca4c3325108f034',
|
||||
'info_dict': {
|
||||
'id': 'bnd-303-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'SPD-Gruppenbild mit Bärbel Bas nach der Fraktionssitzung | dpa',
|
||||
'ext': 'mp3',
|
||||
'title': 'Das Siegel des Bundesnachrichtendienstes | dpa',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
|
||||
@@ -51,13 +55,24 @@ class TagesschauIE(InfoExtractor):
|
||||
'id': 'afd-parteitag-135',
|
||||
'title': 'AfD',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
|
||||
'info_dict': {
|
||||
'id': 'audio-29417-1',
|
||||
'ext': 'mp3',
|
||||
'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
|
||||
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-327.html',
|
||||
'info_dict': {
|
||||
'id': 'podcast-11km-327',
|
||||
'ext': 'mp3',
|
||||
'title': 'Gewalt in der Kita – Wenn Erzieher:innen schweigen',
|
||||
'upload_date': '20230322',
|
||||
'timestamp': 1679482808,
|
||||
'thumbnail': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-329~_v-original.jpg',
|
||||
'description': 'md5:dad059931fe4b3693e3656e93a249848',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
||||
@@ -117,7 +132,7 @@ class TagesschauIE(InfoExtractor):
|
||||
formats = []
|
||||
if media_url.endswith('master.m3u8'):
|
||||
formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls')
|
||||
elif media_url.endswith('.hi.mp3') and media_url.startswith('https://download'):
|
||||
elif media_url.endswith('.mp3'):
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
'vcodec': 'none',
|
||||
@@ -130,20 +145,19 @@ class TagesschauIE(InfoExtractor):
|
||||
'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])),
|
||||
'formats': formats
|
||||
})
|
||||
|
||||
if not entries:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
if len(entries) > 1:
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
formats = entries[0]['formats']
|
||||
video_info = self._search_json_ld(webpage, video_id)
|
||||
description = video_info.get('description')
|
||||
thumbnail = self._og_search_thumbnail(webpage) or video_info.get('thumbnail')
|
||||
timestamp = video_info.get('timestamp')
|
||||
title = title or video_info.get('description')
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'timestamp': timestamp,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': entries[0]['formats'],
|
||||
'timestamp': parse_iso8601(self._html_search_meta('date', webpage)),
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': entries[0]['duration'],
|
||||
}
|
||||
|
||||
@@ -163,11 +163,9 @@ class VQQBaseIE(TencentBaseIE):
|
||||
_REFERER = 'v.qq.com'
|
||||
|
||||
def _get_webpage_metadata(self, webpage, video_id):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]*>[^<]*window\.__pinia\s*=\s*([^<]+)</script>',
|
||||
webpage, 'pinia data', fatal=False),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
return self._search_json(
|
||||
r'<script[^>]*>[^<]*window\.__(?:pinia|PINIA__)\s*=',
|
||||
webpage, 'pinia data', video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
|
||||
class VQQVideoIE(VQQBaseIE):
|
||||
@@ -176,7 +174,7 @@ class VQQVideoIE(VQQBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.qq.com/x/page/q326831cny0.html',
|
||||
'md5': '84568b3722e15e9cd023b5594558c4a7',
|
||||
'md5': 'b11c9cb781df710d686b950376676e2a',
|
||||
'info_dict': {
|
||||
'id': 'q326831cny0',
|
||||
'ext': 'mp4',
|
||||
@@ -187,7 +185,7 @@ class VQQVideoIE(VQQBaseIE):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://v.qq.com/x/page/o3013za7cse.html',
|
||||
'md5': 'cc431c4f9114a55643893c2c8ebf5592',
|
||||
'md5': 'a1bcf42c6d28c189bd2fe2d468abb287',
|
||||
'info_dict': {
|
||||
'id': 'o3013za7cse',
|
||||
'ext': 'mp4',
|
||||
@@ -208,6 +206,7 @@ class VQQVideoIE(VQQBaseIE):
|
||||
'series': '鸡毛飞上天',
|
||||
'format_id': r're:^shd',
|
||||
},
|
||||
'skip': '404',
|
||||
}, {
|
||||
'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html',
|
||||
'md5': 'fadd10bf88aec3420f06f19ee1d24c5b',
|
||||
@@ -220,6 +219,7 @@ class VQQVideoIE(VQQBaseIE):
|
||||
'series': '青年理工工作者生活研究所',
|
||||
'format_id': r're:^shd',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Geo-restricted to China
|
||||
'url': 'https://v.qq.com/x/cover/mcv8hkc8zk8lnov/x0036x5qqsr.html',
|
||||
|
||||
@@ -8,7 +8,7 @@ class TestURLIE(InfoExtractor):
|
||||
""" Allows addressing of the test cases as test:yout.*be_1 """
|
||||
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>[0-9]+))?$'
|
||||
_VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>\d+|all))?$'
|
||||
|
||||
def _real_extract(self, url):
|
||||
from . import gen_extractor_classes
|
||||
@@ -36,6 +36,10 @@ class TestURLIE(InfoExtractor):
|
||||
extractor = matching_extractors[0]
|
||||
|
||||
testcases = tuple(extractor.get_testcases(True))
|
||||
if num == 'all':
|
||||
return self.playlist_result(
|
||||
[self.url_result(tc['url'], extractor) for tc in testcases],
|
||||
url, f'{extractor.IE_NAME} tests')
|
||||
try:
|
||||
tc = testcases[int(num or 0)]
|
||||
except IndexError:
|
||||
@@ -43,4 +47,4 @@ class TestURLIE(InfoExtractor):
|
||||
f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)
|
||||
|
||||
self.to_screen(f'Test URL: {tc["url"]}')
|
||||
return self.url_result(tc['url'])
|
||||
return self.url_result(tc['url'], extractor)
|
||||
|
||||
@@ -62,7 +62,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
|
||||
fatal=fatal, note=note, errnote=errnote, headers={
|
||||
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
|
||||
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)',
|
||||
'Accept': 'application/json',
|
||||
}, query=query)
|
||||
|
||||
@@ -79,11 +79,11 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'_rticket': int(time.time() * 1000),
|
||||
'ts': int(time.time()),
|
||||
'device_brand': 'Google',
|
||||
'device_type': 'Pixel 4',
|
||||
'device_type': 'Pixel 7',
|
||||
'device_platform': 'android',
|
||||
'resolution': '1080*1920',
|
||||
'resolution': '1080*2400',
|
||||
'dpi': 420,
|
||||
'os_version': '10',
|
||||
'os_version': '13',
|
||||
'os_api': '29',
|
||||
'carrier_region': 'US',
|
||||
'sys_region': 'US',
|
||||
@@ -218,8 +218,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||
def extract_addr(addr, add_meta={}):
|
||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
||||
if res:
|
||||
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height'))
|
||||
known_resolutions[res].setdefault('width', add_meta.get('width'))
|
||||
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height'))
|
||||
known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width'))
|
||||
parsed_meta.update(known_resolutions.get(res, {}))
|
||||
add_meta.setdefault('height', int_or_none(res[:-1]))
|
||||
return [{
|
||||
@@ -624,6 +624,32 @@ class TikTokIE(TikTokBaseIE):
|
||||
'thumbnails': 'count:3',
|
||||
},
|
||||
'expected_warnings': ['Unable to find video in feed'],
|
||||
}, {
|
||||
# 1080p format
|
||||
'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830',
|
||||
'md5': '982512017a8a917124d5a08c8ae79621',
|
||||
'info_dict': {
|
||||
'id': '7107337212743830830',
|
||||
'ext': 'mp4',
|
||||
'title': 'new music video 4 don’t come backkkk🧸🖤 i hope u enjoy !! @musicontiktok',
|
||||
'description': 'new music video 4 don’t come backkkk🧸🖤 i hope u enjoy !! @musicontiktok',
|
||||
'uploader': 'tatemcrae',
|
||||
'uploader_id': '86328792343818240',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
|
||||
'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
|
||||
'creator': 't8',
|
||||
'artist': 't8',
|
||||
'track': 'original sound',
|
||||
'upload_date': '20220609',
|
||||
'timestamp': 1654805899,
|
||||
'duration': 150,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https://.+\.webp',
|
||||
},
|
||||
'params': {'format': 'bytevc1_1080p_808907-0'},
|
||||
}, {
|
||||
# Auto-captions available
|
||||
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
|
||||
|
||||
@@ -2,8 +2,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,19 +23,25 @@ class TV4IE(InfoExtractor):
|
||||
sport/|
|
||||
)
|
||||
)(?P<id>[0-9]+)'''
|
||||
_GEO_COUNTRIES = ['SE']
|
||||
_GEO_BYPASS = False
|
||||
_TESTS = [
|
||||
{
|
||||
# not geo-restricted
|
||||
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
|
||||
'md5': 'cb837212f342d77cec06e6dad190e96d',
|
||||
'info_dict': {
|
||||
'id': '2491650',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kalla Fakta 5 (english subtitles)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': int,
|
||||
'description': '2491650',
|
||||
'series': 'Kalla fakta',
|
||||
'duration': 1335,
|
||||
'thumbnail': r're:^https?://[^/?#]+/api/v2/img/',
|
||||
'timestamp': 1385373240,
|
||||
'upload_date': '20131125',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv4play.se/iframe/video/3054113',
|
||||
@@ -46,6 +55,7 @@ class TV4IE(InfoExtractor):
|
||||
'timestamp': int,
|
||||
'upload_date': '20150130',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv4play.se/sport/3060959',
|
||||
@@ -69,29 +79,28 @@ class TV4IE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'https://playback-api.b17g.net/asset/%s' % video_id,
|
||||
video_id, 'Downloading video info JSON', query={
|
||||
'service': 'tv4',
|
||||
'device': 'browser',
|
||||
'protocol': 'hls,dash',
|
||||
'drm': 'widevine',
|
||||
})['metadata']
|
||||
|
||||
title = info['title']
|
||||
|
||||
manifest_url = self._download_json(
|
||||
'https://playback-api.b17g.net/media/' + video_id,
|
||||
video_id, query={
|
||||
def _call_api(self, endpoint, video_id, headers=None, query={}):
|
||||
return self._download_json(
|
||||
f'https://playback2.a2d.tv/{endpoint}/{video_id}', video_id,
|
||||
f'Downloading {endpoint} API JSON', headers=headers, query={
|
||||
'service': 'tv4',
|
||||
'device': 'browser',
|
||||
'protocol': 'hls',
|
||||
})['playbackItem']['manifestUrl']
|
||||
formats = []
|
||||
subtitles = {}
|
||||
**query,
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = traverse_obj(self._call_api('asset', video_id, query={
|
||||
'protocol': 'hls,dash',
|
||||
'drm': 'widevine',
|
||||
}), ('metadata', {dict})) or {}
|
||||
|
||||
manifest_url = self._call_api(
|
||||
'play', video_id, headers=self.geo_verification_headers())['playbackItem']['manifestUrl']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_url, video_id, 'mp4',
|
||||
@@ -117,20 +126,24 @@ class TV4IE(InfoExtractor):
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
|
||||
if not formats and info.get('is_geo_restricted'):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo-restriction, or not being authenticated',
|
||||
countries=['SE'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': info.get('description'),
|
||||
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'thumbnail': info.get('image'),
|
||||
'is_live': info.get('isLive') is True,
|
||||
'series': info.get('seriesTitle'),
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode': info.get('episodeTitle'),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
**traverse_obj(info, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': (('broadcast_date_time', 'broadcastDateTime'), {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'is_live': ('isLive', {bool_or_none}),
|
||||
'series': ('seriesTitle', {str}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode': ('episodeTitle', {str}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
@@ -488,9 +488,9 @@ class TVPVODBaseIE(InfoExtractor):
|
||||
f'{self._API_BASE_URL}/{resource}', video_id,
|
||||
query={'lang': 'pl', 'platform': 'BROWSER', **query},
|
||||
expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
|
||||
if is_valid(urlh.status):
|
||||
if is_valid(urlh.getcode()):
|
||||
return document
|
||||
raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
|
||||
raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.getcode()})')
|
||||
|
||||
def _parse_video(self, video, with_url=True):
|
||||
info_dict = traverse_obj(video, {
|
||||
|
||||
@@ -60,7 +60,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
@property
|
||||
def _CLIENT_ID(self):
|
||||
return self._configuration_arg(
|
||||
'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key=TwitchStreamIE, casesense=True)[0]
|
||||
'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key='Twitch', casesense=True)[0]
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
def fail(message):
|
||||
|
||||
@@ -3,7 +3,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
@@ -30,11 +29,67 @@ from ..utils import (
|
||||
|
||||
|
||||
class TwitterBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'twitter'
|
||||
_API_BASE = 'https://api.twitter.com/1.1/'
|
||||
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
|
||||
_guest_token = None
|
||||
_flow_token = None
|
||||
|
||||
_LOGIN_INIT_DATA = json.dumps({
|
||||
'input_flow_data': {
|
||||
'flow_context': {
|
||||
'debug_overrides': {},
|
||||
'start_location': {
|
||||
'location': 'unknown'
|
||||
}
|
||||
}
|
||||
},
|
||||
'subtask_versions': {
|
||||
'action_list': 2,
|
||||
'alert_dialog': 1,
|
||||
'app_download_cta': 1,
|
||||
'check_logged_in_account': 1,
|
||||
'choice_selection': 3,
|
||||
'contacts_live_sync_permission_prompt': 0,
|
||||
'cta': 7,
|
||||
'email_verification': 2,
|
||||
'end_flow': 1,
|
||||
'enter_date': 1,
|
||||
'enter_email': 2,
|
||||
'enter_password': 5,
|
||||
'enter_phone': 2,
|
||||
'enter_recaptcha': 1,
|
||||
'enter_text': 5,
|
||||
'enter_username': 2,
|
||||
'generic_urt': 3,
|
||||
'in_app_notification': 1,
|
||||
'interest_picker': 3,
|
||||
'js_instrumentation': 1,
|
||||
'menu_dialog': 1,
|
||||
'notifications_permission_prompt': 2,
|
||||
'open_account': 2,
|
||||
'open_home_timeline': 1,
|
||||
'open_link': 1,
|
||||
'phone_verification': 4,
|
||||
'privacy_options': 1,
|
||||
'security_key': 3,
|
||||
'select_avatar': 4,
|
||||
'select_banner': 2,
|
||||
'settings_list': 7,
|
||||
'show_code': 1,
|
||||
'sign_up': 2,
|
||||
'sign_up_review': 4,
|
||||
'tweet_selection_urt': 1,
|
||||
'update_users': 1,
|
||||
'upload_media': 1,
|
||||
'user_recommendations_list': 4,
|
||||
'user_recommendations_urt': 1,
|
||||
'wait_spinner': 3,
|
||||
'web_modal': 1
|
||||
}
|
||||
}, separators=(',', ':')).encode()
|
||||
|
||||
def _extract_variant_formats(self, variant, video_id):
|
||||
variant_url = variant.get('url')
|
||||
@@ -86,18 +141,151 @@ class TwitterBaseIE(InfoExtractor):
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
|
||||
@functools.cached_property
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||
|
||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||
cookies = self._get_cookies(self._API_BASE)
|
||||
def _fetch_guest_token(self, headers, display_id):
|
||||
headers.pop('x-guest-token', None)
|
||||
self._guest_token = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE}guest/activate.json', display_id,
|
||||
'Downloading guest token', data=b'', headers=headers), 'guest_token')
|
||||
if not self._guest_token:
|
||||
raise ExtractorError('Could not retrieve guest token')
|
||||
|
||||
def _set_base_headers(self):
|
||||
headers = self._AUTH.copy()
|
||||
csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
|
||||
if csrf_token:
|
||||
headers['x-csrf-token'] = csrf_token
|
||||
return headers
|
||||
|
||||
csrf_cookie = cookies.get('ct0')
|
||||
if csrf_cookie:
|
||||
headers['x-csrf-token'] = csrf_cookie.value
|
||||
def _call_login_api(self, note, headers, query={}, data=None):
|
||||
response = self._download_json(
|
||||
f'{self._API_BASE}onboarding/task.json', None, note,
|
||||
headers=headers, query=query, data=data, expected_status=400)
|
||||
error = traverse_obj(response, ('errors', 0, 'message', {str}))
|
||||
if error:
|
||||
raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
|
||||
elif traverse_obj(response, 'status') != 'success':
|
||||
raise ExtractorError('Login was unsuccessful')
|
||||
|
||||
subtask = traverse_obj(
|
||||
response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
|
||||
if not subtask:
|
||||
raise ExtractorError('Twitter API did not return next login subtask')
|
||||
|
||||
self._flow_token = response['flow_token']
|
||||
|
||||
return subtask
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
return
|
||||
|
||||
self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
|
||||
headers = self._set_base_headers()
|
||||
self._fetch_guest_token(headers, None)
|
||||
headers.update({
|
||||
'content-type': 'application/json',
|
||||
'x-guest-token': self._guest_token,
|
||||
'x-twitter-client-language': 'en',
|
||||
'x-twitter-active-user': 'yes',
|
||||
'Referer': 'https://twitter.com/',
|
||||
'Origin': 'https://twitter.com',
|
||||
})
|
||||
|
||||
def build_login_json(*subtask_inputs):
|
||||
return json.dumps({
|
||||
'flow_token': self._flow_token,
|
||||
'subtask_inputs': subtask_inputs
|
||||
}, separators=(',', ':')).encode()
|
||||
|
||||
def input_dict(subtask_id, text):
|
||||
return {
|
||||
'subtask_id': subtask_id,
|
||||
'enter_text': {
|
||||
'text': text,
|
||||
'link': 'next_link'
|
||||
}
|
||||
}
|
||||
|
||||
next_subtask = self._call_login_api(
|
||||
'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
|
||||
|
||||
while not self.is_logged_in:
|
||||
if next_subtask == 'LoginJsInstrumentationSubtask':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting JS instrumentation response', headers, data=build_login_json({
|
||||
'subtask_id': next_subtask,
|
||||
'js_instrumentation': {
|
||||
'response': '{}',
|
||||
'link': 'next_link'
|
||||
}
|
||||
}))
|
||||
|
||||
elif next_subtask == 'LoginEnterUserIdentifierSSO':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting username', headers, data=build_login_json({
|
||||
'subtask_id': next_subtask,
|
||||
'settings_list': {
|
||||
'setting_responses': [{
|
||||
'key': 'user_identifier',
|
||||
'response_data': {
|
||||
'text_data': {
|
||||
'result': username
|
||||
}
|
||||
}
|
||||
}],
|
||||
'link': 'next_link'
|
||||
}
|
||||
}))
|
||||
|
||||
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting alternate identifier', headers,
|
||||
data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
|
||||
'one of username, phone number or email that was not used as --username'))))
|
||||
|
||||
elif next_subtask == 'LoginEnterPassword':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting password', headers, data=build_login_json({
|
||||
'subtask_id': next_subtask,
|
||||
'enter_password': {
|
||||
'password': password,
|
||||
'link': 'next_link'
|
||||
}
|
||||
}))
|
||||
|
||||
elif next_subtask == 'AccountDuplicationCheck':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting account duplication check', headers, data=build_login_json({
|
||||
'subtask_id': next_subtask,
|
||||
'check_logged_in_account': {
|
||||
'link': 'AccountDuplicationCheck_false'
|
||||
}
|
||||
}))
|
||||
|
||||
elif next_subtask == 'LoginTwoFactorAuthChallenge':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting 2FA token', headers, data=build_login_json(input_dict(
|
||||
next_subtask, self._get_tfa_info('two-factor authentication token'))))
|
||||
|
||||
elif next_subtask == 'LoginAcid':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting confirmation code', headers, data=build_login_json(input_dict(
|
||||
next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
|
||||
|
||||
elif next_subtask == 'LoginSuccessSubtask':
|
||||
raise ExtractorError('Twitter API did not grant auth token cookie')
|
||||
|
||||
else:
|
||||
raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
|
||||
|
||||
self.report_login()
|
||||
|
||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||
headers = self._set_base_headers()
|
||||
if self.is_logged_in:
|
||||
headers.update({
|
||||
'x-twitter-auth-type': 'OAuth2Session',
|
||||
@@ -106,15 +294,10 @@ class TwitterBaseIE(InfoExtractor):
|
||||
})
|
||||
|
||||
for first_attempt in (True, False):
|
||||
if not self.is_logged_in and not self._guest_token:
|
||||
headers.pop('x-guest-token', None)
|
||||
self._guest_token = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE}guest/activate.json', video_id,
|
||||
'Downloading guest token', data=b'', headers=headers), 'guest_token')
|
||||
if self._guest_token:
|
||||
if not self.is_logged_in:
|
||||
if not self._guest_token:
|
||||
self._fetch_guest_token(headers, video_id)
|
||||
headers['x-guest-token'] = self._guest_token
|
||||
elif not self.is_logged_in:
|
||||
raise ExtractorError('Could not retrieve guest token')
|
||||
|
||||
allowed_status = {400, 401, 403, 404} if graphql else {403}
|
||||
result = self._download_json(
|
||||
|
||||
@@ -112,18 +112,19 @@ class URPlayIE(InfoExtractor):
|
||||
lang = ISO639Utils.short2long(lang)
|
||||
return lang or None
|
||||
|
||||
for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
|
||||
if (k in ('sd', 'hd') or not isinstance(v, dict)):
|
||||
continue
|
||||
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
|
||||
if not sttl_url:
|
||||
continue
|
||||
lang = parse_lang_code(lang)
|
||||
if not lang:
|
||||
continue
|
||||
sttl = subtitles.get(lang) or []
|
||||
sttl.append({'ext': k, 'url': sttl_url, })
|
||||
subtitles[lang] = sttl
|
||||
for stream in urplayer_data['streamingInfo'].values():
|
||||
for k, v in stream.items():
|
||||
if (k in ('sd', 'hd') or not isinstance(v, dict)):
|
||||
continue
|
||||
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
|
||||
if not sttl_url:
|
||||
continue
|
||||
lang = parse_lang_code(lang)
|
||||
if not lang:
|
||||
continue
|
||||
sttl = subtitles.get(lang) or []
|
||||
sttl.append({'ext': k, 'url': sttl_url, })
|
||||
subtitles[lang] = sttl
|
||||
|
||||
image = urplayer_data.get('image') or {}
|
||||
thumbnails = []
|
||||
|
||||
@@ -39,7 +39,7 @@ class VidioBaseIE(InfoExtractor):
|
||||
login_post, login_post_urlh = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
|
||||
|
||||
if login_post_urlh.status == 401:
|
||||
if login_post_urlh.getcode() == 401:
|
||||
if get_element_by_class('onboarding-content-register-popup__title', login_post):
|
||||
raise ExtractorError(
|
||||
'Unable to log in: The provided email has not registered yet.', expected=True)
|
||||
|
||||
@@ -1,14 +1,86 @@
|
||||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class VootIE(InfoExtractor):
|
||||
class VootBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'voot'
|
||||
_GEO_BYPASS = False
|
||||
_LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.'
|
||||
_TOKEN = None
|
||||
_EXPIRY = 0
|
||||
_API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._TOKEN and self._EXPIRY:
|
||||
return
|
||||
|
||||
if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
|
||||
VootBaseIE._TOKEN = password
|
||||
VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp']
|
||||
self.report_login()
|
||||
|
||||
# Mobile number as username is not supported
|
||||
elif not username.isdigit():
|
||||
check_username = self._download_json(
|
||||
'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({
|
||||
'type': 'email',
|
||||
'email': username
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
**self._API_HEADERS,
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
}, note='Checking username', expected_status=403)
|
||||
if not traverse_obj(check_username, ('isExist', {bool})):
|
||||
if traverse_obj(check_username, ('status', 'code', {int})) == 9999:
|
||||
self.raise_geo_restricted(countries=['IN'])
|
||||
raise ExtractorError('Incorrect username', expected=True)
|
||||
auth_token = traverse_obj(self._download_json(
|
||||
'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({
|
||||
'type': 'traditional',
|
||||
'deviceId': str(uuid.uuid4()),
|
||||
'deviceBrand': 'PC/MAC',
|
||||
'data': {
|
||||
'email': username,
|
||||
'password': password
|
||||
}
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
**self._API_HEADERS,
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
}, note='Logging in', expected_status=400), ('data', 'authToken', {dict}))
|
||||
if not auth_token:
|
||||
raise ExtractorError('Incorrect password', expected=True)
|
||||
VootBaseIE._TOKEN = auth_token['accessToken']
|
||||
VootBaseIE._EXPIRY = auth_token['expirationTime']
|
||||
|
||||
else:
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
|
||||
def _check_token_expiry(self):
|
||||
if int(time.time()) >= self._EXPIRY:
|
||||
raise ExtractorError('Access token has expired', expected=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._TOKEN:
|
||||
self.raise_login_required(self._LOGIN_HINT, method=None)
|
||||
self._check_token_expiry()
|
||||
|
||||
|
||||
class VootIE(VootBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
voot:|
|
||||
@@ -20,27 +92,25 @@ class VootIE(InfoExtractor):
|
||||
)
|
||||
(?P<id>\d{3,})
|
||||
'''
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
|
||||
'info_dict': {
|
||||
'id': '0_8ledb18o',
|
||||
'id': '441353',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
|
||||
'title': 'Is this the end of Kamini?',
|
||||
'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
|
||||
'timestamp': 1472162937,
|
||||
'timestamp': 1472103000,
|
||||
'upload_date': '20160825',
|
||||
'series': 'Ishq Ka Rang Safed',
|
||||
'season_number': 1,
|
||||
'episode': 'Is this the end of Kamini?',
|
||||
'episode_number': 340,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'release_date': '20160825',
|
||||
'season': 'Season 1',
|
||||
'age_limit': 13,
|
||||
'duration': 1146.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
|
||||
'only_matching': True,
|
||||
@@ -55,59 +125,50 @@ class VootIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media_info = self._download_json(
|
||||
'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
|
||||
query={
|
||||
'platform': 'Web',
|
||||
'pId': 2,
|
||||
'mediaId': video_id,
|
||||
})
|
||||
'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id,
|
||||
query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN})
|
||||
|
||||
status_code = try_get(media_info, lambda x: x['status']['code'], int)
|
||||
if status_code != 0:
|
||||
raise ExtractorError(media_info['status']['message'], expected=True)
|
||||
try:
|
||||
m3u8_url = self._download_json(
|
||||
'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id,
|
||||
'Downloading playback JSON', data=b'{}', headers={
|
||||
**self.geo_verification_headers(),
|
||||
**self._API_HEADERS,
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'platform': 'androidwebdesktop',
|
||||
'vootid': video_id,
|
||||
'voottoken': self._TOKEN,
|
||||
})['m3u8']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 400:
|
||||
self._check_token_expiry()
|
||||
raise
|
||||
|
||||
media = media_info['assets']
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
entry_id = media['EntryId']
|
||||
title = media['MediaName']
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
|
||||
video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
description, series, season_number, episode, episode_number = [None] * 5
|
||||
|
||||
for meta in try_get(media, lambda x: x['Metas'], list) or []:
|
||||
key, value = meta.get('Key'), meta.get('Value')
|
||||
if not key or not value:
|
||||
continue
|
||||
if key == 'ContentSynopsis':
|
||||
description = value
|
||||
elif key == 'RefSeriesTitle':
|
||||
series = value
|
||||
elif key == 'RefSeriesSeason':
|
||||
season_number = int_or_none(value)
|
||||
elif key == 'EpisodeMainTitle':
|
||||
episode = value
|
||||
elif key == 'EpisodeNo':
|
||||
episode_number = int_or_none(value)
|
||||
return {
|
||||
'extractor_key': 'Kaltura',
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'timestamp': unified_timestamp(media.get('CreationDate')),
|
||||
'duration': int_or_none(media.get('Duration')),
|
||||
'view_count': int_or_none(media.get('ViewCounter')),
|
||||
'like_count': int_or_none(media.get('like_counter')),
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
|
||||
'formats': traverse_obj(formats, (
|
||||
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
|
||||
'http_headers': self._API_HEADERS,
|
||||
**traverse_obj(media_info, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
'release_date': ('telecastDate', {unified_strdate}),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class VootSeriesIE(InfoExtractor):
|
||||
class VootSeriesIE(VootBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',
|
||||
|
||||
@@ -41,7 +41,7 @@ class WrestleUniverseBaseIE(InfoExtractor):
|
||||
token = try_call(lambda: self._get_cookies('https://www.wrestle-universe.com/')['token'].value)
|
||||
if not token and not self._REFRESH_TOKEN:
|
||||
self.raise_login_required()
|
||||
self._REAL_TOKEN = token
|
||||
self._TOKEN = token
|
||||
|
||||
if not self._REAL_TOKEN or self._TOKEN_EXPIRY <= int(time.time()):
|
||||
if not self._REFRESH_TOKEN:
|
||||
|
||||
@@ -158,7 +158,7 @@ class XimalayaAlbumIE(XimalayaBaseIE):
|
||||
return self._download_json(
|
||||
'https://www.ximalaya.com/revision/album/v1/getTracksList',
|
||||
playlist_id, note=f'Downloading tracks list page {page_idx}',
|
||||
query={'albumId': playlist_id, 'pageNum': page_idx, 'sort': 1})['data']
|
||||
query={'albumId': playlist_id, 'pageNum': page_idx})['data']
|
||||
|
||||
def _get_entries(self, page_data):
|
||||
for e in page_data['tracks']:
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -97,3 +98,30 @@ class YappyIE(InfoExtractor):
|
||||
'categories': traverse_obj(media_data, ('categories', ..., 'name')) or None,
|
||||
'repost_count': int_or_none(media_data.get('sharingCount'))
|
||||
}
|
||||
|
||||
|
||||
class YappyProfileIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://yappy\.media/profile/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://yappy.media/profile/59a0c8c485e5410b9c43474bf4c6a373',
|
||||
'info_dict': {
|
||||
'id': '59a0c8c485e5410b9c43474bf4c6a373',
|
||||
},
|
||||
'playlist_mincount': 527,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
profile_id = self._match_id(url)
|
||||
|
||||
def fetch_page(page_num):
|
||||
page_num += 1
|
||||
videos = self._download_json(
|
||||
f'https://yappy.media/api/video/list/{profile_id}?page={page_num}',
|
||||
profile_id, f'Downloading profile page {page_num} JSON')
|
||||
|
||||
for video in traverse_obj(videos, ('results', lambda _, v: v['uuid'])):
|
||||
yield self.url_result(
|
||||
f'https://yappy.media/video/{video["uuid"]}', YappyIE,
|
||||
video['uuid'], video.get('description'))
|
||||
|
||||
return self.playlist_result(OnDemandPagedList(fetch_page, 15), profile_id)
|
||||
|
||||
@@ -258,7 +258,7 @@ def build_innertube_clients():
|
||||
THIRD_PARTY = {
|
||||
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
|
||||
}
|
||||
BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
|
||||
BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
|
||||
priority = qualities(BASE_CLIENTS[::-1])
|
||||
|
||||
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
|
||||
@@ -292,6 +292,7 @@ class BadgeType(enum.Enum):
|
||||
AVAILABILITY_PREMIUM = enum.auto()
|
||||
AVAILABILITY_SUBSCRIPTION = enum.auto()
|
||||
LIVE_NOW = enum.auto()
|
||||
VERIFIED = enum.auto()
|
||||
|
||||
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
@@ -791,17 +792,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_and_report_alerts(self, data, *args, **kwargs):
|
||||
return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
|
||||
|
||||
def _extract_badges(self, renderer: dict):
|
||||
privacy_icon_map = {
|
||||
def _extract_badges(self, badge_list: list):
|
||||
"""
|
||||
Extract known BadgeType's from a list of badge renderers.
|
||||
@returns [{'type': BadgeType}]
|
||||
"""
|
||||
icon_type_map = {
|
||||
'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
|
||||
'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
|
||||
'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
|
||||
'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
|
||||
'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
|
||||
'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
|
||||
'CHECK': BadgeType.VERIFIED,
|
||||
}
|
||||
|
||||
badge_style_map = {
|
||||
'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
|
||||
'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
|
||||
'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
|
||||
'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
|
||||
'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
|
||||
'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
|
||||
}
|
||||
|
||||
label_map = {
|
||||
@@ -809,13 +819,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
'private': BadgeType.AVAILABILITY_PRIVATE,
|
||||
'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
|
||||
'live': BadgeType.LIVE_NOW,
|
||||
'premium': BadgeType.AVAILABILITY_PREMIUM
|
||||
'premium': BadgeType.AVAILABILITY_PREMIUM,
|
||||
'verified': BadgeType.VERIFIED,
|
||||
'official artist channel': BadgeType.VERIFIED,
|
||||
}
|
||||
|
||||
badges = []
|
||||
for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
|
||||
for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
|
||||
badge_type = (
|
||||
privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
|
||||
icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
|
||||
or badge_style_map.get(traverse_obj(badge, 'style'))
|
||||
)
|
||||
if badge_type:
|
||||
@@ -823,11 +835,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
continue
|
||||
|
||||
# fallback, won't work in some languages
|
||||
label = traverse_obj(badge, 'label', expected_type=str, default='')
|
||||
label = traverse_obj(
|
||||
badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
|
||||
for match, label_badge_type in label_map.items():
|
||||
if match in label.lower():
|
||||
badges.append({'type': badge_type})
|
||||
continue
|
||||
badges.append({'type': label_badge_type})
|
||||
break
|
||||
|
||||
return badges
|
||||
|
||||
@@ -1020,8 +1033,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
overlay_style = traverse_obj(
|
||||
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
|
||||
get_all=False, expected_type=str)
|
||||
badges = self._extract_badges(renderer)
|
||||
|
||||
badges = self._extract_badges(traverse_obj(renderer, 'badges'))
|
||||
owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
|
||||
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
|
||||
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
|
||||
expected_type=str)) or ''
|
||||
@@ -1079,7 +1092,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
|
||||
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
|
||||
view_count_field: view_count,
|
||||
'live_status': live_status
|
||||
'live_status': live_status,
|
||||
'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
|
||||
}
|
||||
|
||||
|
||||
@@ -1332,6 +1346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
|
||||
'uploader_id': '@PhilippHagemeister',
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1415,6 +1430,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'The Witcher',
|
||||
'uploader_url': 'https://www.youtube.com/@thewitcher',
|
||||
'uploader_id': '@thewitcher',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -1444,6 +1462,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
|
||||
'uploader_id': '@FlyingKitty900',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -1577,6 +1596,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Olympics',
|
||||
'uploader_url': 'https://www.youtube.com/@Olympics',
|
||||
'uploader_id': '@Olympics',
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires avconv',
|
||||
@@ -1894,6 +1914,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Bernie Sanders',
|
||||
'uploader_url': 'https://www.youtube.com/@BernieSanders',
|
||||
'uploader_id': '@BernieSanders',
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1955,6 +1977,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Vsauce',
|
||||
'uploader_url': 'https://www.youtube.com/@Vsauce',
|
||||
'uploader_id': '@Vsauce',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -2147,6 +2171,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'kudvenkat',
|
||||
'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
|
||||
'uploader_id': '@Csharp-video-tutorialsBlogspot',
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -2227,6 +2253,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'CBS Mornings',
|
||||
'uploader_url': 'https://www.youtube.com/@CBSMornings',
|
||||
'uploader_id': '@CBSMornings',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -2297,6 +2325,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'colinfurze',
|
||||
'uploader_url': 'https://www.youtube.com/@colinfurze',
|
||||
'uploader_id': '@colinfurze',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'format': '17', # 3gp format available on android
|
||||
@@ -2342,6 +2373,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'SciShow',
|
||||
'uploader_url': 'https://www.youtube.com/@SciShow',
|
||||
'uploader_id': '@SciShow',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
}, 'params': {'format': 'mhtml', 'skip_download': True}
|
||||
}, {
|
||||
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
|
||||
@@ -2370,6 +2404,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Leon Nguyen',
|
||||
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
|
||||
'uploader_id': '@LeonNguyen',
|
||||
'heatmap': 'count:100',
|
||||
}
|
||||
}, {
|
||||
# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
|
||||
@@ -2398,6 +2433,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Leon Nguyen',
|
||||
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
|
||||
'uploader_id': '@LeonNguyen',
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
|
||||
}, {
|
||||
@@ -2428,6 +2464,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Quackity',
|
||||
'uploader_id': '@Quackity',
|
||||
'uploader_url': 'https://www.youtube.com/@Quackity',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
}
|
||||
},
|
||||
{ # continuous livestream. Microformat upload date should be preferred.
|
||||
@@ -2594,6 +2633,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'MrBeast',
|
||||
'uploader_url': 'https://www.youtube.com/@MrBeast',
|
||||
'uploader_id': '@MrBeast',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
|
||||
}, {
|
||||
@@ -2655,6 +2697,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'さなちゃんねる',
|
||||
'uploader_url': 'https://www.youtube.com/@sana_natori',
|
||||
'uploader_id': '@sana_natori',
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -2684,6 +2728,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.webp',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
|
||||
'playable_in_embed': True,
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
|
||||
@@ -2720,6 +2767,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Christopher Sykes',
|
||||
'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
|
||||
'uploader_id': '@ChristopherSykesDocumentaries',
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -3121,7 +3169,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return funcname
|
||||
|
||||
return json.loads(js_to_json(self._search_regex(
|
||||
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
|
||||
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])[,;]', jscode,
|
||||
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
@@ -3337,14 +3385,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
info['author_is_uploader'] = author_is_uploader
|
||||
|
||||
comment_abr = traverse_obj(
|
||||
comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict)
|
||||
comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
|
||||
if comment_abr is not None:
|
||||
info['is_favorited'] = 'creatorHeart' in comment_abr
|
||||
|
||||
comment_ab_icontype = traverse_obj(
|
||||
comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType'))
|
||||
if comment_ab_icontype is not None:
|
||||
info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE')
|
||||
badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
|
||||
if self._has_badge(badges, BadgeType.VERIFIED):
|
||||
info['author_is_verified'] = True
|
||||
|
||||
is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
|
||||
if is_pinned:
|
||||
@@ -3581,7 +3628,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def _is_unplayable(player_response):
|
||||
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
|
||||
|
||||
_STORY_PLAYER_PARAMS = '8AEB'
|
||||
_PLAYER_PARAMS = 'CgIQBg=='
|
||||
|
||||
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
|
||||
|
||||
@@ -3595,7 +3642,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'videoId': video_id,
|
||||
}
|
||||
if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
|
||||
yt_query['params'] = self._STORY_PLAYER_PARAMS
|
||||
yt_query['params'] = self._PLAYER_PARAMS
|
||||
|
||||
yt_query.update(self._generate_player_context(sts))
|
||||
return self._extract_response(
|
||||
@@ -3607,7 +3654,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _get_requested_clients(self, url, smuggled_data):
|
||||
requested_clients = []
|
||||
default = ['android', 'web']
|
||||
default = ['ios', 'android', 'web']
|
||||
allowed_clients = sorted(
|
||||
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
|
||||
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
|
||||
@@ -3830,6 +3877,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
||||
|
||||
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
||||
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||||
fps = int_or_none(fmt.get('fps')) or 0
|
||||
dct = {
|
||||
'asr': int_or_none(fmt.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt.get('contentLength')),
|
||||
@@ -3837,16 +3886,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'format_note': join_nonempty(
|
||||
join_nonempty(audio_track.get('displayName'),
|
||||
language_preference > 0 and ' (default)', delim=''),
|
||||
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
|
||||
fmt.get('isDrc') and 'DRC',
|
||||
name, fmt.get('isDrc') and 'DRC',
|
||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED',
|
||||
(self.get_param('verbose') or all_formats) and client_name,
|
||||
delim=', '),
|
||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||
'source_preference': -10 if throttled else -5 if itag == '22' else -1,
|
||||
'fps': int_or_none(fmt.get('fps')) or None,
|
||||
'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
|
||||
+ (100 if 'Premium' in name else 0)),
|
||||
'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
|
||||
'audio_channels': fmt.get('audioChannels'),
|
||||
'height': height,
|
||||
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
|
||||
@@ -3915,11 +3964,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif itag:
|
||||
f['format_id'] = itag
|
||||
|
||||
if itag in ('616', '235'):
|
||||
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
||||
f['source_preference'] = (f.get('source_preference') or -1) + 100
|
||||
|
||||
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
|
||||
if f['quality'] == -1 and f.get('height'):
|
||||
f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
|
||||
if self.get_param('verbose'):
|
||||
if self.get_param('verbose') or all_formats:
|
||||
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
|
||||
if f.get('fps') and f['fps'] <= 1:
|
||||
del f['fps']
|
||||
return True
|
||||
|
||||
subtitles = {}
|
||||
@@ -3992,8 +4047,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
webpage = None
|
||||
if 'webpage' not in self._configuration_arg('player_skip'):
|
||||
query = {'bpctr': '9999999999', 'has_verified': '1'}
|
||||
if smuggled_data.get('is_story'):
|
||||
query['pp'] = self._STORY_PLAYER_PARAMS
|
||||
if smuggled_data.get('is_story'): # XXX: Deprecated
|
||||
query['pp'] = self._PLAYER_PARAMS
|
||||
webpage = self._download_webpage(
|
||||
webpage_url, video_id, fatal=False, query=query)
|
||||
|
||||
@@ -4297,9 +4352,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
trans_code += f'-{lang_code}'
|
||||
trans_name += format_field(lang_name, None, ' from %s')
|
||||
# Add an "-orig" label to the original language so that it can be distinguished.
|
||||
# The subs are returned without "-orig" as well for compatibility
|
||||
if lang_code == f'a-{orig_trans_code}':
|
||||
# Set audio language based on original subtitles
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and not f.get('language'):
|
||||
f['language'] = orig_trans_code
|
||||
# Add an "-orig" label to the original language so that it can be distinguished.
|
||||
# The subs are returned without "-orig" as well for compatibility
|
||||
process_language(
|
||||
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
|
||||
# Setting tlang=lang returns damaged subtitles.
|
||||
@@ -4319,15 +4378,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
info[d_k] = parse_duration(query[k][0])
|
||||
|
||||
# Youtube Music Auto-generated description
|
||||
if video_description:
|
||||
if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
|
||||
# XXX: Causes catastrophic backtracking if description has "·"
|
||||
# E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
|
||||
# Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
|
||||
# reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
|
||||
mobj = re.search(
|
||||
r'''(?xs)
|
||||
(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
|
||||
(?P<album>[^\n]+)
|
||||
(?=(?P<track>[^\n·]+))(?P=track)·
|
||||
(?=(?P<artist>[^\n]+))(?P=artist)\n+
|
||||
(?=(?P<album>[^\n]+))(?P=album)\n
|
||||
(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
|
||||
(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
|
||||
(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
|
||||
.+\nAuto-generated\ by\ YouTube\.\s*$
|
||||
(.+?\nArtist\s*:\s*
|
||||
(?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
|
||||
)?.+\nAuto-generated\ by\ YouTube\.\s*$
|
||||
''', video_description)
|
||||
if mobj:
|
||||
release_year = mobj.group('release_year')
|
||||
@@ -4488,6 +4553,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
info['artist'] = mrr_contents_text
|
||||
elif mrr_title == 'Song':
|
||||
info['track'] = mrr_contents_text
|
||||
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
|
||||
if self._has_badge(owner_badges, BadgeType.VERIFIED):
|
||||
info['channel_is_verified'] = True
|
||||
|
||||
info.update({
|
||||
'uploader': info.get('channel'),
|
||||
@@ -4505,7 +4573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
|
||||
):
|
||||
upload_date = strftime_or_none(
|
||||
self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
|
||||
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
|
||||
info['upload_date'] = upload_date
|
||||
|
||||
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
|
||||
@@ -4513,7 +4581,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if v:
|
||||
info[d_k] = v
|
||||
|
||||
badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
|
||||
badges = self._extract_badges(traverse_obj(vpir, 'badges'))
|
||||
|
||||
is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
|
||||
or get_first(video_details, 'isPrivate', expected_type=bool))
|
||||
@@ -4586,13 +4654,14 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
channel_id = self.ucid_or_none(renderer['channelId'])
|
||||
title = self._get_text(renderer, 'title')
|
||||
channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
|
||||
# As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
|
||||
# However we can expect them to change that in the future.
|
||||
channel_handle = self.handle_from_url(
|
||||
traverse_obj(renderer, (
|
||||
'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
|
||||
('browseEndpoint', 'canonicalBaseUrl')),
|
||||
{str}), get_all=False))
|
||||
if not channel_handle:
|
||||
# As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
|
||||
channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': channel_url,
|
||||
@@ -4605,10 +4674,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
'title': title,
|
||||
'uploader_id': channel_handle,
|
||||
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
|
||||
'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
|
||||
# See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
|
||||
# However, in feed/channels this is set correctly to the subscriber count
|
||||
'channel_follower_count': traverse_obj(
|
||||
renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
|
||||
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
|
||||
'playlist_count': self._get_count(renderer, 'videoCountText'),
|
||||
'playlist_count': (
|
||||
# videoCountText may be the subscriber count
|
||||
self._get_count(renderer, 'videoCountText')
|
||||
if self._get_count(renderer, 'subscriberCountText') is not None else None),
|
||||
'description': self._get_text(renderer, 'descriptionSnippet'),
|
||||
'channel_is_verified': True if self._has_badge(
|
||||
self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
|
||||
}
|
||||
|
||||
def _grid_entries(self, grid_renderer):
|
||||
@@ -5024,6 +5101,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': channel_handle,
|
||||
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
|
||||
})
|
||||
|
||||
channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
|
||||
if self._has_badge(channel_badges, BadgeType.VERIFIED):
|
||||
info['channel_is_verified'] = True
|
||||
# Playlist stats is a text runs array containing [video count, view count, last updated].
|
||||
# last updated or (view count and last updated) may be missing.
|
||||
playlist_stats = get_first(
|
||||
@@ -5032,7 +5113,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
last_updated_unix = self._parse_time_text(
|
||||
self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
|
||||
or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
|
||||
info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
|
||||
info['modified_date'] = strftime_or_none(last_updated_unix)
|
||||
|
||||
info['view_count'] = self._get_count(playlist_stats, 1)
|
||||
if info['view_count'] is None: # 0 is allowed
|
||||
@@ -5132,7 +5213,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
|
||||
player_header_privacy = playlist_header_renderer.get('privacy')
|
||||
|
||||
badges = self._extract_badges(sidebar_renderer)
|
||||
badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
|
||||
|
||||
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
|
||||
privacy_setting_icon = get_first(
|
||||
@@ -5382,7 +5463,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@3blue1brown',
|
||||
'uploader': '3Blue1Brown',
|
||||
'tags': ['Mathematics'],
|
||||
'channel_follower_count': int
|
||||
'channel_follower_count': int,
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'playlists, singlepage',
|
||||
@@ -5559,6 +5641,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@3blue1brown',
|
||||
'uploader_id': '@3blue1brown',
|
||||
'uploader': '3Blue1Brown',
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
@@ -5722,7 +5805,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
|
||||
'info_dict': {
|
||||
'id': 'AlTsmyW4auo', # This will keep changing
|
||||
'id': 'hGkQjiJLjWQ', # This will keep changing
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'upload_date': r're:\d{8}',
|
||||
@@ -5746,6 +5829,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@SkyNews',
|
||||
'uploader_id': '@SkyNews',
|
||||
'uploader': 'Sky News',
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -6234,7 +6318,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'channel': str,
|
||||
'uploader': str,
|
||||
'uploader_url': str,
|
||||
'uploader_id': str
|
||||
'uploader_id': str,
|
||||
'channel_is_verified': bool, # this will keep changing
|
||||
}
|
||||
}],
|
||||
'params': {'extract_flat': True, 'playlist_items': '1'},
|
||||
@@ -6270,6 +6355,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'uploader': 'PewDiePie',
|
||||
'uploader_url': 'https://www.youtube.com/@PewDiePie',
|
||||
'uploader_id': '@PewDiePie',
|
||||
'channel_is_verified': True,
|
||||
}
|
||||
}],
|
||||
'params': {'extract_flat': True},
|
||||
@@ -6288,6 +6374,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@3blue1brown',
|
||||
'uploader_id': '@3blue1brown',
|
||||
'uploader': '3Blue1Brown',
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}, {
|
||||
@@ -6322,6 +6409,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'description': 'I make music',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
|
||||
'channel_follower_count': int,
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
@@ -6897,12 +6985,15 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
|
||||
'title': 'Kurzgesagt – In a Nutshell',
|
||||
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
|
||||
'playlist_count': int, # XXX: should have a way of saying > 1
|
||||
# No longer available for search as it is set to the handle.
|
||||
# 'playlist_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
|
||||
'thumbnails': list,
|
||||
'uploader_id': '@kurzgesagt',
|
||||
'uploader_url': 'https://www.youtube.com/@kurzgesagt',
|
||||
'uploader': 'Kurzgesagt – In a Nutshell',
|
||||
'channel_is_verified': True,
|
||||
'channel_follower_count': int,
|
||||
}
|
||||
}],
|
||||
'params': {'extract_flat': True, 'playlist_items': '1'},
|
||||
@@ -7166,6 +7257,8 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||
'live_status': 'not_live',
|
||||
'channel_follower_count': int,
|
||||
'chapters': 'count:20',
|
||||
'comment_count': int,
|
||||
'heatmap': 'count:100',
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -7226,6 +7319,8 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
|
||||
'channel': 'さなちゃんねる',
|
||||
'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
|
||||
'uploader': 'さなちゃんねる',
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {'skip_download': 'Youtube'},
|
||||
|
||||
130
yt_dlp/extractor/zaiko.py
Normal file
130
yt_dlp/extractor/zaiko.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ZaikoBaseIE(InfoExtractor):
|
||||
def _download_real_webpage(self, url, video_id):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
final_url = urlh.geturl()
|
||||
if 'zaiko.io/login' in final_url:
|
||||
self.raise_login_required()
|
||||
elif '/_buy/' in final_url:
|
||||
raise ExtractorError('Your account does not have tickets to this event', expected=True)
|
||||
return webpage
|
||||
|
||||
def _parse_vue_element_attr(self, name, string, video_id):
|
||||
page_elem = self._search_regex(rf'(<{name}[^>]+>)', string, name)
|
||||
attrs = {}
|
||||
for key, value in extract_attributes(page_elem).items():
|
||||
if key.startswith(':'):
|
||||
attrs[key[1:]] = self._parse_json(
|
||||
value, video_id, transform_source=unescapeHTML, fatal=False)
|
||||
return attrs
|
||||
|
||||
|
||||
class ZaikoIE(ZaikoBaseIE):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?zaiko\.io/event/(?P<id>\d+)/stream(?:/\d+)+'
|
||||
_TESTS = [{
|
||||
'url': 'https://zaiko.io/event/324868/stream/20571/20571',
|
||||
'info_dict': {
|
||||
'id': '324868',
|
||||
'ext': 'mp4',
|
||||
'title': 'ZAIKO STREAMING TEST',
|
||||
'alt_title': '[VOD] ZAIKO STREAMING TEST_20210603(Do Not Delete)',
|
||||
'uploader_id': '454',
|
||||
'uploader': 'ZAIKO ZERO',
|
||||
'release_timestamp': 1583809200,
|
||||
'thumbnail': r're:https://[a-z0-9]+.cloudfront.net/[a-z0-9_]+/[a-z0-9_]+',
|
||||
'release_date': '20200310',
|
||||
'categories': ['Tech House'],
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_real_webpage(url, video_id)
|
||||
stream_meta = self._parse_vue_element_attr('stream-page', webpage, video_id)
|
||||
|
||||
player_page = self._download_webpage(
|
||||
stream_meta['stream-access']['video_source'], video_id,
|
||||
'Downloading player page', headers={'referer': 'https://zaiko.io/'})
|
||||
player_meta = self._parse_vue_element_attr('player', player_page, video_id)
|
||||
status = traverse_obj(player_meta, ('initial_event_info', 'status', {str}))
|
||||
live_status, msg, expected = {
|
||||
'vod': ('was_live', 'No VOD stream URL was found', False),
|
||||
'archiving': ('post_live', 'Event VOD is still being processed', True),
|
||||
'deleting': ('post_live', 'This event has ended', True),
|
||||
'deleted': ('post_live', 'This event has ended', True),
|
||||
'error': ('post_live', 'This event has ended', True),
|
||||
'disconnected': ('post_live', 'Stream has been disconnected', True),
|
||||
'live_to_disconnected': ('post_live', 'Stream has been disconnected', True),
|
||||
'live': ('is_live', 'No livestream URL found was found', False),
|
||||
'waiting': ('is_upcoming', 'Live event has not yet started', True),
|
||||
'cancelled': ('not_live', 'Event has been cancelled', True),
|
||||
}.get(status) or ('not_live', f'Unknown event status "{status}"', False)
|
||||
|
||||
stream_url = traverse_obj(player_meta, ('initial_event_info', 'endpoint', {url_or_none}))
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_url, video_id, live=True, fatal=False) if stream_url else []
|
||||
if not formats:
|
||||
self.raise_no_formats(msg, expected=expected)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(stream_meta, {
|
||||
'title': ('event', 'name', {str}),
|
||||
'uploader': ('profile', 'name', {str}),
|
||||
'uploader_id': ('profile', 'id', {str_or_none}),
|
||||
'release_timestamp': ('stream', 'start', 'timestamp', {int_or_none}),
|
||||
'categories': ('event', 'genres', ..., {lambda x: x or None}),
|
||||
}),
|
||||
**traverse_obj(player_meta, ('initial_event_info', {
|
||||
'alt_title': ('title', {str}),
|
||||
'thumbnail': ('poster_url', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class ZaikoETicketIE(ZaikoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www.)?zaiko\.io/account/eticket/(?P<id>[\w=-]{49})'
|
||||
_TESTS = [{
|
||||
'url': 'https://zaiko.io/account/eticket/TZjMwMzQ2Y2EzMXwyMDIzMDYwNzEyMTMyNXw1MDViOWU2Mw==',
|
||||
'playlist_count': 1,
|
||||
'info_dict': {
|
||||
'id': 'f30346ca31-20230607121325-505b9e63',
|
||||
'title': 'ZAIKO STREAMING TEST',
|
||||
'thumbnail': 'https://media.zkocdn.net/pf_1/1_3wdyjcjyupseatkwid34u',
|
||||
},
|
||||
'skip': 'Only available with the ticketholding account',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ticket_id = self._match_id(url)
|
||||
ticket_id = try_call(
|
||||
lambda: base64.urlsafe_b64decode(ticket_id[1:]).decode().replace('|', '-')) or ticket_id
|
||||
|
||||
webpage = self._download_real_webpage(url, ticket_id)
|
||||
eticket = self._parse_vue_element_attr('eticket', webpage, ticket_id)
|
||||
|
||||
return self.playlist_result(
|
||||
[self.url_result(stream, ZaikoIE) for stream in traverse_obj(eticket, ('streams', ..., 'url'))],
|
||||
ticket_id, **traverse_obj(eticket, ('ticket-details', {
|
||||
'title': 'event_name',
|
||||
'thumbnail': 'event_img_url',
|
||||
})))
|
||||
@@ -24,7 +24,7 @@ from ..utils import (
|
||||
|
||||
class ZDFBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'uhd')
|
||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
|
||||
|
||||
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
|
||||
headers = {}
|
||||
@@ -61,6 +61,9 @@ class ZDFBaseIE(InfoExtractor):
|
||||
elif mime_type == 'application/f4m+xml' or ext == 'f4m':
|
||||
new_formats = self._extract_f4m_formats(
|
||||
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
new_formats = self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
f = parse_codecs(meta.get('mimeCodec'))
|
||||
if not f and meta.get('type'):
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
import json
|
||||
import random
|
||||
import string
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
str_or_none,
|
||||
try_call,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -94,12 +96,12 @@ class Zee5IE(InfoExtractor):
|
||||
'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
|
||||
_DEVICE_ID = ''.join(random.choices(string.ascii_letters + string.digits, k=20)).ljust(32, '0')
|
||||
_DEVICE_ID = str(uuid.uuid4())
|
||||
_USER_TOKEN = None
|
||||
_LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
|
||||
_NETRC_MACHINE = 'zee5'
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_USER_COUNTRY = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
|
||||
@@ -118,11 +120,16 @@ class Zee5IE(InfoExtractor):
|
||||
self._USER_TOKEN = otp_verify_json.get('token')
|
||||
if not self._USER_TOKEN:
|
||||
raise ExtractorError(otp_request_json['message'], expected=True)
|
||||
elif username.lower() == 'token' and len(password) > 1198:
|
||||
elif username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
|
||||
self._USER_TOKEN = password
|
||||
else:
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
|
||||
token = jwt_decode_hs256(self._USER_TOKEN)
|
||||
if token.get('exp', 0) <= int(time.time()):
|
||||
raise ExtractorError('User token has expired', expected=True)
|
||||
self._USER_COUNTRY = token.get('current_country')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
access_token_request = self._download_json(
|
||||
@@ -137,8 +144,13 @@ class Zee5IE(InfoExtractor):
|
||||
data['X-Z5-Guest-Token'] = self._DEVICE_ID
|
||||
|
||||
json_data = self._download_json(
|
||||
self._DETAIL_API_URL.format(video_id, self._DEVICE_ID),
|
||||
video_id, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
|
||||
'https://spapi.zee5.com/singlePlayback/getDetails/secure', video_id, query={
|
||||
'content_id': video_id,
|
||||
'device_id': self._DEVICE_ID,
|
||||
'platform_name': 'desktop_web',
|
||||
'country': self._USER_COUNTRY or self.get_param('geo_bypass_country') or 'IN',
|
||||
'check_parental_control': False,
|
||||
}, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
|
||||
asset_data = json_data['assetDetails']
|
||||
show_data = json_data.get('showDetails', {})
|
||||
if 'premium' in asset_data['business_type']:
|
||||
|
||||
@@ -44,7 +44,7 @@ def _js_arith_op(op):
|
||||
|
||||
|
||||
def _js_div(a, b):
|
||||
if JS_Undefined in (a, b) or not (a and b):
|
||||
if JS_Undefined in (a, b) or not (a or b):
|
||||
return float('nan')
|
||||
return (a or 0) / b if b else float('inf')
|
||||
|
||||
@@ -779,7 +779,7 @@ class JSInterpreter:
|
||||
obj = {}
|
||||
obj_m = re.search(
|
||||
r'''(?x)
|
||||
(?<!this\.)%s\s*=\s*{\s*
|
||||
(?<!\.)%s\s*=\s*{\s*
|
||||
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
|
||||
}\s*;
|
||||
''' % (re.escape(objname), _FUNC_NAME_RE),
|
||||
@@ -812,9 +812,9 @@ class JSInterpreter:
|
||||
\((?P<args>[^)]*)\)\s*
|
||||
(?P<code>{.+})''' % {'name': re.escape(funcname)},
|
||||
self.code)
|
||||
code, _ = self._separate_at_paren(func_m.group('code'))
|
||||
if func_m is None:
|
||||
raise self.Exception(f'Could not find JS function "{funcname}"')
|
||||
code, _ = self._separate_at_paren(func_m.group('code'))
|
||||
return [x.strip() for x in func_m.group('args').split(',')], code
|
||||
|
||||
def extract_function(self, funcname):
|
||||
|
||||
@@ -474,15 +474,15 @@ def create_parser():
|
||||
callback_kwargs={
|
||||
'allowed_values': {
|
||||
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
|
||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
|
||||
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
||||
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||
}, 'aliases': {
|
||||
'youtube-dl': ['all', '-multistreams'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
|
||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter'],
|
||||
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
|
||||
'2022': ['no-external-downloader-progress'],
|
||||
'2022': ['no-external-downloader-progress', 'playlist-match-filter'],
|
||||
}
|
||||
}, help=(
|
||||
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
||||
@@ -727,6 +727,10 @@ def create_parser():
|
||||
'--netrc-location',
|
||||
dest='netrc_location', metavar='PATH',
|
||||
help='Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc')
|
||||
authentication.add_option(
|
||||
'--netrc-cmd',
|
||||
dest='netrc_cmd', metavar='NETRC_CMD',
|
||||
help='Command to execute to get the credentials for an extractor.')
|
||||
authentication.add_option(
|
||||
'--video-password',
|
||||
dest='videopassword', metavar='PASSWORD',
|
||||
@@ -1015,8 +1019,9 @@ def create_parser():
|
||||
'--download-sections',
|
||||
metavar='REGEX', dest='download_ranges', action='append',
|
||||
help=(
|
||||
'Download only chapters whose title matches the given regular expression. '
|
||||
'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
|
||||
'Download only chapters that match the regular expression. '
|
||||
'A "*" prefix denotes time-range instead of chapter. Negative timestamps are calculated from the end. '
|
||||
'"*from-url" can be used to download between the "start_time" and "end_time" extracted from the URL. '
|
||||
'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
|
||||
'e.g. --download-sections "*10:15-inf" --download-sections "intro"'))
|
||||
downloader.add_option(
|
||||
@@ -1417,8 +1422,7 @@ def create_parser():
|
||||
'--clean-info-json', '--clean-infojson',
|
||||
action='store_true', dest='clean_infojson', default=None,
|
||||
help=(
|
||||
'Remove some private fields such as filenames from the infojson. '
|
||||
'Note that it could still contain some personal information (default)'))
|
||||
'Remove some internal metadata such as filenames from the infojson (default)'))
|
||||
filesystem.add_option(
|
||||
'--no-clean-info-json', '--no-clean-infojson',
|
||||
action='store_false', dest='clean_infojson',
|
||||
@@ -1681,8 +1685,7 @@ def create_parser():
|
||||
'Execute a command, optionally prefixed with when to execute it, separated by a ":". '
|
||||
'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). '
|
||||
'Same syntax as the output template can be used to pass any field as arguments to the command. '
|
||||
'After download, an additional field "filepath" that contains the final path of the downloaded file '
|
||||
'is also available, and if no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '
|
||||
'If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '
|
||||
'This option can be used multiple times'))
|
||||
postproc.add_option(
|
||||
'--no-exec',
|
||||
|
||||
@@ -187,7 +187,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
|
||||
tmpl = progress_template.get('postprocess')
|
||||
if tmpl:
|
||||
self._downloader.to_screen(
|
||||
self._downloader.evaluate_outtmpl(tmpl, progress_dict), skip_eol=True, quiet=False)
|
||||
self._downloader.evaluate_outtmpl(tmpl, progress_dict), quiet=False)
|
||||
|
||||
self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
|
||||
progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',
|
||||
|
||||
@@ -149,7 +149,7 @@ class Updater:
|
||||
f'You are switching to an {self.ydl._format_err("unofficial", "red")} executable '
|
||||
f'from {self.ydl._format_err(self._target_repo, self.ydl.Styles.EMPHASIS)}. '
|
||||
f'Run {self.ydl._format_err("at your own risk", "light red")}')
|
||||
self.restart = self._blocked_restart
|
||||
self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
|
||||
else:
|
||||
self._target_repo = UPDATE_SOURCES.get(self.target_channel)
|
||||
if not self._target_repo:
|
||||
@@ -294,6 +294,7 @@ class Updater:
|
||||
if (_VERSION_RE.fullmatch(self.target_tag[5:])
|
||||
and version_tuple(self.target_tag[5:]) < (2023, 3, 2)):
|
||||
self.ydl.report_warning('You are downgrading to a version without --update-to')
|
||||
self._block_restart('Cannot automatically restart to a version without --update-to')
|
||||
|
||||
directory = os.path.dirname(self.filename)
|
||||
if not os.access(self.filename, os.W_OK):
|
||||
@@ -381,11 +382,11 @@ class Updater:
|
||||
_, _, returncode = Popen.run(self.cmd)
|
||||
return returncode
|
||||
|
||||
def _blocked_restart(self):
|
||||
self._report_error(
|
||||
'Automatically restarting into custom builds is disabled for security reasons. '
|
||||
'Restart yt-dlp to use the updated version', expected=True)
|
||||
return self.ydl._download_retcode
|
||||
def _block_restart(self, msg):
|
||||
def wrapper():
|
||||
self._report_error(f'{msg}. Restart yt-dlp to use the updated version', expected=True)
|
||||
return self.ydl._download_retcode
|
||||
self.restart = wrapper
|
||||
|
||||
|
||||
def run_update(ydl):
|
||||
|
||||
@@ -6,7 +6,7 @@ import sys
|
||||
import urllib.parse
|
||||
import zlib
|
||||
|
||||
from ._utils import decode_base_n, preferredencoding
|
||||
from ._utils import Popen, decode_base_n, preferredencoding
|
||||
from .traversal import traverse_obj
|
||||
from ..dependencies import certifi, websockets
|
||||
|
||||
@@ -174,3 +174,7 @@ def handle_youtubedl_headers(headers):
|
||||
del filtered_headers['Youtubedl-no-compression']
|
||||
|
||||
return filtered_headers
|
||||
|
||||
|
||||
def process_communicate_or_kill(p, *args, **kwargs):
|
||||
return Popen.communicate_or_kill(p, *args, **kwargs)
|
||||
|
||||
@@ -25,6 +25,7 @@ import json
|
||||
import locale
|
||||
import math
|
||||
import mimetypes
|
||||
import netrc
|
||||
import operator
|
||||
import os
|
||||
import platform
|
||||
@@ -864,10 +865,11 @@ def escapeHTML(text):
|
||||
)
|
||||
|
||||
|
||||
def process_communicate_or_kill(p, *args, **kwargs):
|
||||
deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
|
||||
f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
|
||||
return Popen.communicate_or_kill(p, *args, **kwargs)
|
||||
class netrc_from_content(netrc.netrc):
|
||||
def __init__(self, content):
|
||||
self.hosts, self.macros = {}, {}
|
||||
with io.StringIO(content) as stream:
|
||||
self._parse('-', stream, False)
|
||||
|
||||
|
||||
class Popen(subprocess.Popen):
|
||||
@@ -1654,7 +1656,7 @@ def unified_strdate(date_str, day_first=True):
|
||||
|
||||
|
||||
def unified_timestamp(date_str, day_first=True, with_milliseconds=False):
|
||||
if date_str is None:
|
||||
if not isinstance(date_str, str):
|
||||
return None
|
||||
|
||||
date_str = re.sub(r'\s+', ' ', re.sub(
|
||||
@@ -2446,13 +2448,16 @@ def request_to_url(req):
|
||||
return req
|
||||
|
||||
|
||||
def strftime_or_none(timestamp, date_format, default=None):
|
||||
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
||||
datetime_object = None
|
||||
try:
|
||||
if isinstance(timestamp, (int, float)): # unix timestamp
|
||||
# Using naive datetime here can break timestamp() in Windows
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
|
||||
datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
|
||||
# Also, datetime.datetime.fromtimestamp breaks for negative timestamps
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
|
||||
datetime_object = (datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
|
||||
+ datetime.timedelta(seconds=timestamp))
|
||||
elif isinstance(timestamp, str): # assume YYYYMMDD
|
||||
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
|
||||
date_format = re.sub( # Support %s on windows
|
||||
@@ -3304,7 +3309,7 @@ STR_FORMAT_RE_TMPL = r'''(?x)
|
||||
'''
|
||||
|
||||
|
||||
STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
|
||||
STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
|
||||
|
||||
|
||||
def limit_length(s, length):
|
||||
@@ -3507,7 +3512,8 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
|
||||
},
|
||||
}
|
||||
|
||||
sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
|
||||
sanitize_codec = functools.partial(
|
||||
try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
|
||||
vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
|
||||
|
||||
for ext in preferences or COMPATIBLE_CODECS.keys():
|
||||
@@ -3753,12 +3759,10 @@ def match_filter_func(filters, breaking_filters=None):
|
||||
|
||||
|
||||
class download_range_func:
|
||||
def __init__(self, chapters, ranges):
|
||||
self.chapters, self.ranges = chapters, ranges
|
||||
def __init__(self, chapters, ranges, from_info=False):
|
||||
self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
|
||||
|
||||
def __call__(self, info_dict, ydl):
|
||||
if not self.ranges and not self.chapters:
|
||||
yield {}
|
||||
|
||||
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
|
||||
else 'Cannot match chapters since chapter information is unavailable')
|
||||
@@ -3770,7 +3774,23 @@ class download_range_func:
|
||||
if self.chapters and warning:
|
||||
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
|
||||
|
||||
yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
|
||||
for start, end in self.ranges or []:
|
||||
yield {
|
||||
'start_time': self._handle_negative_timestamp(start, info_dict),
|
||||
'end_time': self._handle_negative_timestamp(end, info_dict),
|
||||
}
|
||||
|
||||
if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
|
||||
yield {
|
||||
'start_time': info_dict.get('start_time') or 0,
|
||||
'end_time': info_dict.get('end_time') or float('inf'),
|
||||
}
|
||||
elif not self.ranges and not self.chapters:
|
||||
yield {}
|
||||
|
||||
@staticmethod
|
||||
def _handle_negative_timestamp(time, info):
|
||||
return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, download_range_func)
|
||||
@@ -4152,6 +4172,7 @@ class ISO639Utils:
|
||||
'or': 'ori',
|
||||
'os': 'oss',
|
||||
'pa': 'pan',
|
||||
'pe': 'per',
|
||||
'pi': 'pli',
|
||||
'pl': 'pol',
|
||||
'ps': 'pus',
|
||||
@@ -5673,6 +5694,7 @@ def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None)
|
||||
return orderedSet(requested)
|
||||
|
||||
|
||||
# TODO: Rewrite
|
||||
class FormatSorter:
|
||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
|
||||
|
||||
@@ -5721,8 +5743,10 @@ class FormatSorter:
|
||||
'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
|
||||
|
||||
'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
|
||||
'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
|
||||
'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
|
||||
'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
|
||||
'function': lambda it: next(filter(None, it), None)},
|
||||
'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
|
||||
'function': lambda it: next(filter(None, it), None)},
|
||||
'ext': {'type': 'combined', 'field': ('vext', 'aext')},
|
||||
'res': {'type': 'multiple', 'field': ('height', 'width'),
|
||||
'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
|
||||
@@ -5953,13 +5977,15 @@ class FormatSorter:
|
||||
format['preference'] = -100
|
||||
|
||||
# Determine missing bitrates
|
||||
if format.get('tbr') is None:
|
||||
if format.get('vbr') is not None and format.get('abr') is not None:
|
||||
format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
|
||||
else:
|
||||
if format.get('vcodec') != 'none' and format.get('vbr') is None:
|
||||
format['vbr'] = format.get('tbr') - format.get('abr', 0)
|
||||
if format.get('acodec') != 'none' and format.get('abr') is None:
|
||||
format['abr'] = format.get('tbr') - format.get('vbr', 0)
|
||||
if format.get('vcodec') == 'none':
|
||||
format['vbr'] = 0
|
||||
if format.get('acodec') == 'none':
|
||||
format['abr'] = 0
|
||||
if not format.get('vbr') and format.get('vcodec') != 'none':
|
||||
format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
|
||||
if not format.get('abr') and format.get('acodec') != 'none':
|
||||
format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
|
||||
if not format.get('tbr'):
|
||||
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
|
||||
|
||||
return tuple(self._calculate_field_preference(format, field) for field in self._order)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2023.03.04'
|
||||
__version__ = '2023.06.22'
|
||||
|
||||
RELEASE_GIT_HEAD = '392389b7df7b818f794b231f14dc396d4875fbad'
|
||||
RELEASE_GIT_HEAD = '812cdfa06c33a40e73a8e04b3e6f42c084666a43'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user