mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-06 06:51:30 +00:00
Merge branch 'master' into yt-live-from-start-range
This commit is contained in:
@@ -65,7 +65,13 @@ from .postprocessor import (
|
||||
get_postprocessor,
|
||||
)
|
||||
from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
|
||||
from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant
|
||||
from .update import (
|
||||
REPOSITORY,
|
||||
_get_system_deprecation,
|
||||
_make_label,
|
||||
current_git_head,
|
||||
detect_variant,
|
||||
)
|
||||
from .utils import (
|
||||
DEFAULT_OUTTMPL,
|
||||
IDENTITY,
|
||||
@@ -163,7 +169,7 @@ from .utils.networking import (
|
||||
clean_proxies,
|
||||
std_headers,
|
||||
)
|
||||
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
|
||||
from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
import ctypes
|
||||
@@ -630,13 +636,16 @@ class YoutubeDL:
|
||||
'Overwriting params from "color" with "no_color"')
|
||||
self.params['color'] = 'no_color'
|
||||
|
||||
term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
|
||||
term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
|
||||
no_color = bool(os.getenv('NO_COLOR'))
|
||||
|
||||
def process_color_policy(stream):
|
||||
stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
|
||||
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
|
||||
if policy in ('auto', None):
|
||||
return term_allow_color and supports_terminal_sequences(stream)
|
||||
if term_allow_color and supports_terminal_sequences(stream):
|
||||
return 'no_color' if no_color else True
|
||||
return False
|
||||
assert policy in ('always', 'never', 'no_color'), policy
|
||||
return {'always': True, 'never': False}.get(policy, policy)
|
||||
|
||||
@@ -1181,6 +1190,7 @@ class YoutubeDL:
|
||||
MATH_FUNCTIONS = {
|
||||
'+': float.__add__,
|
||||
'-': float.__sub__,
|
||||
'*': float.__mul__,
|
||||
}
|
||||
# Field is of the form key1.key2...
|
||||
# where keys (except first) can be string, int, slice or "{field, ...}"
|
||||
@@ -1202,6 +1212,15 @@ class YoutubeDL:
|
||||
(?:\|(?P<default>.*?))?
|
||||
)$''')
|
||||
|
||||
def _from_user_input(field):
|
||||
if field == ':':
|
||||
return ...
|
||||
elif ':' in field:
|
||||
return slice(*map(int_or_none, field.split(':')))
|
||||
elif int_or_none(field) is not None:
|
||||
return int(field)
|
||||
return field
|
||||
|
||||
def _traverse_infodict(fields):
|
||||
fields = [f for x in re.split(r'\.({.+?})\.?', fields)
|
||||
for f in ([x] if x.startswith('{') else x.split('.'))]
|
||||
@@ -1211,11 +1230,12 @@ class YoutubeDL:
|
||||
|
||||
for i, f in enumerate(fields):
|
||||
if not f.startswith('{'):
|
||||
fields[i] = _from_user_input(f)
|
||||
continue
|
||||
assert f.endswith('}'), f'No closing brace for {f} in {fields}'
|
||||
fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
|
||||
fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
|
||||
|
||||
return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
|
||||
return traverse_obj(info_dict, fields, traverse_string=True)
|
||||
|
||||
def get_value(mdict):
|
||||
# Object traversal
|
||||
@@ -2343,7 +2363,7 @@ class YoutubeDL:
|
||||
return
|
||||
|
||||
for f in formats:
|
||||
if f.get('has_drm'):
|
||||
if f.get('has_drm') or f.get('__needs_testing'):
|
||||
yield from self._check_formats([f])
|
||||
else:
|
||||
yield f
|
||||
@@ -2456,9 +2476,16 @@ class YoutubeDL:
|
||||
return selector_function(ctx_copy)
|
||||
return final_selector
|
||||
|
||||
stream = io.BytesIO(format_spec.encode())
|
||||
# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
|
||||
# Prefix numbers with random letters to avoid it being classified as a number
|
||||
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
|
||||
# TODO: Implement parser not reliant on tokenize.tokenize
|
||||
prefix = ''.join(random.choices(string.ascii_letters, k=32))
|
||||
stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
|
||||
try:
|
||||
tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
|
||||
tokens = list(_remove_unused_ops(
|
||||
token._replace(string=token.string.replace(prefix, ''))
|
||||
for token in tokenize.tokenize(stream.readline)))
|
||||
except tokenize.TokenError:
|
||||
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
||||
|
||||
@@ -2591,6 +2618,9 @@ class YoutubeDL:
|
||||
upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
|
||||
if not info_dict.get('release_year'):
|
||||
info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
|
||||
|
||||
live_keys = ('is_live', 'was_live')
|
||||
live_status = info_dict.get('live_status')
|
||||
if live_status is None:
|
||||
@@ -2769,7 +2799,8 @@ class YoutubeDL:
|
||||
format['dynamic_range'] = 'SDR'
|
||||
if format.get('aspect_ratio') is None:
|
||||
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
|
||||
if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
|
||||
# For fragmented formats, "tbr" is often max bitrate and not average
|
||||
if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
|
||||
and info_dict.get('duration') and format.get('tbr')
|
||||
and not format.get('filesize') and not format.get('filesize_approx')):
|
||||
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
|
||||
@@ -3548,14 +3579,14 @@ class YoutubeDL:
|
||||
'version': __version__,
|
||||
'current_git_head': current_git_head(),
|
||||
'release_git_head': RELEASE_GIT_HEAD,
|
||||
'repository': REPOSITORY,
|
||||
'repository': ORIGIN,
|
||||
})
|
||||
|
||||
if remove_private_keys:
|
||||
reject = lambda k, v: v is None or k.startswith('__') or k in {
|
||||
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
|
||||
'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
|
||||
'playlist_autonumber', '_format_sort_fields',
|
||||
'playlist_autonumber',
|
||||
}
|
||||
else:
|
||||
reject = lambda k, v: False
|
||||
@@ -3931,8 +3962,8 @@ class YoutubeDL:
|
||||
source += '*'
|
||||
klass = type(self)
|
||||
write_debug(join_nonempty(
|
||||
f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
|
||||
f'{CHANNEL}@{__version__}',
|
||||
f'{REPOSITORY.rpartition("/")[2]} version',
|
||||
_make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
|
||||
f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
|
||||
'' if source == 'unknown' else f'({source})',
|
||||
'' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
|
||||
@@ -3973,7 +4004,7 @@ class YoutubeDL:
|
||||
})) or 'none'))
|
||||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
# write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['%s%s' % (
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
@@ -4056,12 +4087,25 @@ class YoutubeDL:
|
||||
return self._request_director.send(req)
|
||||
except NoSupportingHandlers as e:
|
||||
for ue in e.unsupported_errors:
|
||||
# FIXME: This depends on the order of errors.
|
||||
if not (ue.handler and ue.msg):
|
||||
continue
|
||||
if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
|
||||
raise RequestError(
|
||||
'file:// URLs are disabled by default in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||
if 'unsupported proxy type: "https"' in ue.msg.lower():
|
||||
raise RequestError(
|
||||
'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
|
||||
|
||||
elif (
|
||||
re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
|
||||
and 'websockets' not in self._request_director.handlers
|
||||
):
|
||||
raise RequestError(
|
||||
'This request requires WebSocket support. '
|
||||
'Ensure one of the following dependencies are installed: websockets',
|
||||
cause=ue) from ue
|
||||
raise
|
||||
except SSLError as e:
|
||||
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
|
||||
@@ -4104,6 +4148,8 @@ class YoutubeDL:
|
||||
}),
|
||||
))
|
||||
director.preferences.update(preferences or [])
|
||||
if 'prefer-legacy-http-handler' in self.params['compat_opts']:
|
||||
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
||||
return director
|
||||
|
||||
def encode(self, s):
|
||||
@@ -4226,7 +4272,7 @@ class YoutubeDL:
|
||||
return ret
|
||||
|
||||
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
|
||||
''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
|
||||
''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
|
||||
write_all = self.params.get('write_all_thumbnails', False)
|
||||
thumbnails, ret = [], []
|
||||
if write_all or self.params.get('writethumbnail', False):
|
||||
@@ -4242,6 +4288,9 @@ class YoutubeDL:
|
||||
self.write_debug(f'Skipping writing {label} thumbnail')
|
||||
return ret
|
||||
|
||||
if thumbnails and not self._ensure_dir_exists(filename):
|
||||
return None
|
||||
|
||||
for idx, t in list(enumerate(thumbnails))[::-1]:
|
||||
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
|
||||
thumb_display_id = f'{label} thumbnail {t["id"]}'
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
try:
|
||||
import contextvars # noqa: F401
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541
|
||||
import sys
|
||||
|
||||
if sys.version_info < (3, 8):
|
||||
raise ImportError(
|
||||
f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
@@ -12,7 +12,6 @@ import itertools
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
@@ -75,14 +74,16 @@ def _exit(status=0, *args):
|
||||
|
||||
|
||||
def get_urls(urls, batchfile, verbose):
|
||||
# Batch file verification
|
||||
"""
|
||||
@param verbose -1: quiet, 0: normal, 1: verbose
|
||||
"""
|
||||
batch_urls = []
|
||||
if batchfile is not None:
|
||||
try:
|
||||
batch_urls = read_batch_urls(
|
||||
read_stdin('URLs') if batchfile == '-'
|
||||
read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-'
|
||||
else open(expand_path(batchfile), encoding='utf-8', errors='ignore'))
|
||||
if verbose:
|
||||
if verbose == 1:
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except OSError:
|
||||
_exit(f'ERROR: batch file {batchfile} could not be read')
|
||||
@@ -729,7 +730,7 @@ ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'u
|
||||
def parse_options(argv=None):
|
||||
"""@returns ParsedOptions(parser, opts, urls, ydl_opts)"""
|
||||
parser, opts, urls = parseOpts(argv)
|
||||
urls = get_urls(urls, opts.batchfile, opts.verbose)
|
||||
urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose)
|
||||
|
||||
set_compat_opts(opts)
|
||||
try:
|
||||
|
||||
@@ -21,9 +21,11 @@ def get_hidden_imports():
|
||||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
yield from collect_submodules('websockets')
|
||||
# Only `websockets` is required, others are collected just in case
|
||||
for module in ('websockets', 'requests', 'urllib3'):
|
||||
yield from collect_submodules(module)
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
yield from ('mutagen', 'brotli', 'certifi')
|
||||
yield from ('mutagen', 'brotli', 'certifi', 'secretstorage')
|
||||
|
||||
|
||||
hiddenimports = list(get_hidden_imports())
|
||||
|
||||
@@ -10,17 +10,3 @@ try:
|
||||
cache # >= 3.9
|
||||
except NameError:
|
||||
cache = lru_cache(maxsize=None)
|
||||
|
||||
try:
|
||||
cached_property # >= 3.8
|
||||
except NameError:
|
||||
class cached_property:
|
||||
def __init__(self, func):
|
||||
update_wrapper(self, func)
|
||||
self.func = func
|
||||
|
||||
def __get__(self, instance, _):
|
||||
if instance is None:
|
||||
return self
|
||||
setattr(instance, self.func.__name__, self.func(instance))
|
||||
return getattr(instance, self.func.__name__)
|
||||
|
||||
@@ -186,7 +186,7 @@ def _firefox_browser_dir():
|
||||
if sys.platform in ('cygwin', 'win32'):
|
||||
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
elif sys.platform == 'darwin':
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox')
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
return os.path.expanduser('~/.mozilla/firefox')
|
||||
|
||||
|
||||
|
||||
@@ -58,6 +58,15 @@ except (ImportError, SyntaxError):
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/2633
|
||||
websockets = None
|
||||
|
||||
try:
|
||||
import urllib3
|
||||
except ImportError:
|
||||
urllib3 = None
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
requests = None
|
||||
|
||||
try:
|
||||
import xattr # xattr or pyxattr
|
||||
|
||||
@@ -15,12 +15,15 @@ class DashSegmentsFD(FragmentFD):
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}:
|
||||
self.report_error('Live DASH videos are not supported')
|
||||
if 'http_dash_segments_generator' in info_dict['protocol'].split('+'):
|
||||
real_downloader = None # No external FD can support --live-from-start
|
||||
else:
|
||||
if info_dict.get('is_live'):
|
||||
self.report_error('Live DASH videos are not supported')
|
||||
real_downloader = get_suitable_downloader(
|
||||
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
|
||||
|
||||
real_start = time.time()
|
||||
real_downloader = get_suitable_downloader(
|
||||
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
|
||||
|
||||
requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])]
|
||||
args = []
|
||||
|
||||
@@ -335,7 +335,7 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--auto-file-renaming=false']
|
||||
|
||||
if 'fragments' in info_dict:
|
||||
cmd += ['--file-allocation=none', '--uri-selector=inorder']
|
||||
cmd += ['--uri-selector=inorder']
|
||||
url_list_file = '%s.frag.urls' % tmpfilename
|
||||
url_list = []
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
|
||||
@@ -6,7 +6,7 @@ from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
|
||||
from ..utils import DownloadError, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
@@ -64,7 +64,6 @@ class NiconicoLiveFD(FileDownloader):
|
||||
ws_url = info_dict['url']
|
||||
ws_extractor = info_dict['ws']
|
||||
ws_origin_host = info_dict['origin']
|
||||
cookies = info_dict.get('cookies')
|
||||
live_quality = info_dict.get('live_quality', 'high')
|
||||
live_latency = info_dict.get('live_latency', 'high')
|
||||
dl = FFmpegFD(self.ydl, self.params or {})
|
||||
@@ -76,12 +75,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
|
||||
def communicate_ws(reconnect):
|
||||
if reconnect:
|
||||
ws = WebSocketsWrapper(ws_url, {
|
||||
'Cookies': str_or_none(cookies) or '',
|
||||
'Origin': f'https://{ws_origin_host}',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': self.params['http_headers']['User-Agent'],
|
||||
})
|
||||
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
|
||||
if self.ydl.params.get('verbose', False):
|
||||
self.to_screen('[debug] Sending startWatching request')
|
||||
ws.send(json.dumps({
|
||||
|
||||
@@ -47,7 +47,7 @@ from .acast import (
|
||||
ACastChannelIE,
|
||||
)
|
||||
from .acfun import AcFunVideoIE, AcFunBangumiIE
|
||||
from .adn import ADNIE
|
||||
from .adn import ADNIE, ADNSeasonIE
|
||||
from .adobeconnect import AdobeConnectIE
|
||||
from .adobetv import (
|
||||
AdobeTVEmbedIE,
|
||||
@@ -77,16 +77,24 @@ from .agora import (
|
||||
WyborczaPodcastIE,
|
||||
WyborczaVideoIE,
|
||||
)
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .airtv import AirTVIE
|
||||
from .aitube import AitubeKZVideoIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .allstar import (
|
||||
AllstarIE,
|
||||
AllstarProfileIE,
|
||||
)
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amara import AmaraIE
|
||||
from .altcensored import (
|
||||
AltCensoredIE,
|
||||
AltCensoredChannelIE,
|
||||
)
|
||||
from .alura import (
|
||||
AluraIE,
|
||||
AluraCourseIE
|
||||
)
|
||||
from .amadeustv import AmadeusTVIE
|
||||
from .amara import AmaraIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .amazon import (
|
||||
AmazonStoreIE,
|
||||
@@ -127,8 +135,8 @@ from .arcpublishing import ArcPublishingIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDBetaMediathekIE,
|
||||
ARDMediathekCollectionIE,
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTVIE,
|
||||
@@ -137,9 +145,9 @@ from .arte import (
|
||||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .atvat import ATVAtIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audioboom import AudioBoomIE
|
||||
@@ -212,6 +220,8 @@ from .bilibili import (
|
||||
BiliBiliBangumiIE,
|
||||
BiliBiliBangumiSeasonIE,
|
||||
BiliBiliBangumiMediaIE,
|
||||
BilibiliCheeseIE,
|
||||
BilibiliCheeseSeasonIE,
|
||||
BiliBiliSearchIE,
|
||||
BilibiliCategoryIE,
|
||||
BilibiliAudioIE,
|
||||
@@ -233,11 +243,6 @@ from .bitchute import (
|
||||
BitChuteIE,
|
||||
BitChuteChannelIE,
|
||||
)
|
||||
from .bitwave import (
|
||||
BitwaveReplayIE,
|
||||
BitwaveStreamIE,
|
||||
)
|
||||
from .biqle import BIQLEIE
|
||||
from .blackboardcollaborate import BlackboardCollaborateIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
@@ -252,10 +257,7 @@ from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .boxcast import BoxCastVideoIE
|
||||
from .bpb import BpbIE
|
||||
from .br import (
|
||||
BRIE,
|
||||
BRMediathekIE,
|
||||
)
|
||||
from .br import BRIE
|
||||
from .bravotv import BravoTVIE
|
||||
from .brainpop import (
|
||||
BrainPOPIE,
|
||||
@@ -265,7 +267,6 @@ from .brainpop import (
|
||||
BrainPOPFrIE,
|
||||
BrainPOPIlIE,
|
||||
)
|
||||
from .breakcom import BreakIE
|
||||
from .breitbart import BreitBartIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
@@ -277,6 +278,7 @@ from .brilliantpala import (
|
||||
)
|
||||
from .businessinsider import BusinessInsiderIE
|
||||
from .bundesliga import BundesligaIE
|
||||
from .bundestag import BundestagIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
@@ -295,16 +297,11 @@ from .camfm import (
|
||||
from .cammodels import CamModelsIE
|
||||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canal1 import Canal1IE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
)
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
@@ -343,7 +340,6 @@ from .cda import CDAIE
|
||||
from .cellebrite import CellebriteIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .cgtn import CGTNIE
|
||||
from .channel9 import Channel9IE
|
||||
from .charlierose import CharlieRoseIE
|
||||
from .chaturbate import ChaturbateIE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
@@ -351,11 +347,10 @@ from .chingari import (
|
||||
ChingariIE,
|
||||
ChingariUserIE,
|
||||
)
|
||||
from .chirbit import (
|
||||
ChirbitIE,
|
||||
ChirbitProfileIE,
|
||||
from .chzzk import (
|
||||
CHZZKLiveIE,
|
||||
CHZZKVideoIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cinemax import CinemaxIE
|
||||
from .cinetecamilano import CinetecaMilanoIE
|
||||
from .cineverse import (
|
||||
@@ -372,10 +367,8 @@ from .clipchamp import ClipchampIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
@@ -442,7 +435,6 @@ from .dacast import (
|
||||
DacastVODIE,
|
||||
DacastPlaylistIE,
|
||||
)
|
||||
from .daftsex import DaftsexIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
@@ -479,7 +471,6 @@ from .dlf import (
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
@@ -526,7 +517,6 @@ from .duboku import (
|
||||
DubokuPlaylistIE
|
||||
)
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .deuxm import (
|
||||
DeuxMIE,
|
||||
DeuxMNewsIE
|
||||
@@ -541,6 +531,7 @@ from .dropout import (
|
||||
DropoutSeasonIE,
|
||||
DropoutIE
|
||||
)
|
||||
from .duoplay import DuoplayIE
|
||||
from .dw import (
|
||||
DWIE,
|
||||
DWArticleIE,
|
||||
@@ -548,29 +539,23 @@ from .dw import (
|
||||
from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ebay import EbayIE
|
||||
from .echomsk import EchoMskIE
|
||||
from .egghead import (
|
||||
EggheadCourseIE,
|
||||
EggheadLessonIE,
|
||||
)
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .elevensports import ElevenSportsIE
|
||||
from .ellentube import (
|
||||
EllenTubeIE,
|
||||
EllenTubeVideoIE,
|
||||
EllenTubePlaylistIE,
|
||||
)
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
from .elonet import ElonetIE
|
||||
from .elpais import ElPaisIE
|
||||
from .eltrecetv import ElTreceTVIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .engadget import EngadgetIE
|
||||
from .epicon import (
|
||||
EpiconIE,
|
||||
EpiconSeriesIE,
|
||||
)
|
||||
from .epidemicsound import EpidemicSoundIE
|
||||
from .eplus import EplusIbIE
|
||||
from .epoch import EpochIE
|
||||
from .eporner import EpornerIE
|
||||
@@ -584,7 +569,6 @@ from .ertgr import (
|
||||
ERTFlixIE,
|
||||
ERTWebtvEmbedIE,
|
||||
)
|
||||
from .escapist import EscapistIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
WatchESPNIE,
|
||||
@@ -592,15 +576,12 @@ from .espn import (
|
||||
FiveThirtyEightIE,
|
||||
ESPNCricInfoIE,
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .ettutv import EttuTvIE
|
||||
from .europa import EuropaIE, EuroParlWebstreamIE
|
||||
from .europeantour import EuropeanTourIE
|
||||
from .eurosport import EurosportIE
|
||||
from .euscreen import EUScreenIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import (
|
||||
FacebookIE,
|
||||
@@ -630,6 +611,10 @@ from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .floatplane import (
|
||||
FloatplaneIE,
|
||||
FloatplaneChannelIE,
|
||||
)
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
@@ -639,16 +624,11 @@ from .fourtube import (
|
||||
PornerBrosIE,
|
||||
FuxIE,
|
||||
)
|
||||
from .fourzerostudio import (
|
||||
FourZeroStudioArchiveIE,
|
||||
FourZeroStudioClipIE,
|
||||
)
|
||||
from .fox import FOXIE
|
||||
from .fox9 import (
|
||||
FOX9IE,
|
||||
FOX9NewsIE,
|
||||
)
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
@@ -681,7 +661,6 @@ from .funimation import (
|
||||
)
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
from .fusion import FusionIE
|
||||
from .fuyintv import FuyinTVIE
|
||||
from .gab import (
|
||||
GabTVIE,
|
||||
@@ -708,11 +687,14 @@ from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .getcourseru import (
|
||||
GetCourseRuPlayerIE,
|
||||
GetCourseRuIE
|
||||
)
|
||||
from .gettr import (
|
||||
GettrIE,
|
||||
GettrStreamingIE,
|
||||
)
|
||||
from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
@@ -758,12 +740,10 @@ from .hbo import HBOIE
|
||||
from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hketv import HKETVIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
from .hollywoodreporter import (
|
||||
HollywoodReporterIE,
|
||||
@@ -778,8 +758,6 @@ from .hotstar import (
|
||||
HotStarSeasonIE,
|
||||
HotStarSeriesIE,
|
||||
)
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .hrefli import HrefLiRedirectIE
|
||||
from .hrfensehen import HRFernsehenIE
|
||||
from .hrti import (
|
||||
@@ -820,6 +798,7 @@ from .iheart import (
|
||||
IHeartRadioIE,
|
||||
IHeartRadioPodcastIE,
|
||||
)
|
||||
from .ilpost import IlPostIE
|
||||
from .iltalehti import IltalehtiIE
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
@@ -893,9 +872,18 @@ from .japandiet import (
|
||||
SangiinIE,
|
||||
)
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jiosaavn import (
|
||||
JioSaavnSongIE,
|
||||
JioSaavnAlbumIE,
|
||||
)
|
||||
from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
from .joqrag import JoqrAgIE
|
||||
from .jstream import JStreamIE
|
||||
from .jtbc import (
|
||||
JTBCIE,
|
||||
JTBCProgramIE,
|
||||
)
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
@@ -903,7 +891,6 @@ from .kanal2 import Kanal2IE
|
||||
from .kankanews import KankaNewsIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .kelbyone import KelbyOneIE
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
@@ -924,6 +911,7 @@ from .koo import KooIE
|
||||
from .kth import KTHIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kukululive import KukuluLiveIE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
@@ -938,17 +926,12 @@ from .la7 import (
|
||||
LA7PodcastEpisodeIE,
|
||||
LA7PodcastIE,
|
||||
)
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lastfm import (
|
||||
LastFMIE,
|
||||
LastFMPlaylistIE,
|
||||
LastFMUserIE,
|
||||
)
|
||||
from .laxarxames import LaXarxaMesIE
|
||||
from .lbry import (
|
||||
LBRYIE,
|
||||
LBRYChannelIE,
|
||||
@@ -997,7 +980,6 @@ from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
)
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .liputan6 import Liputan6IE
|
||||
from .listennotes import ListenNotesIE
|
||||
from .litv import LiTVIE
|
||||
@@ -1025,9 +1007,9 @@ from .lynda import (
|
||||
LyndaIE,
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .maariv import MaarivIE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .magentamusik import MagentaMusikIE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
MailRuMusicIE,
|
||||
@@ -1053,6 +1035,7 @@ from .markiza import (
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .masters import MastersIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mbn import MBNIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
from .mediaite import MediaiteIE
|
||||
@@ -1075,10 +1058,7 @@ from .medici import MediciIE
|
||||
from .megaphone import MegaphoneIE
|
||||
from .meipai import MeipaiIE
|
||||
from .melonvod import MelonVODIE
|
||||
from .meta import METAIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .mgtv import MGTVIE
|
||||
from .miaopai import MiaoPaiIE
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
@@ -1100,7 +1080,6 @@ from .minds import (
|
||||
)
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
from .mirrativ import (
|
||||
MirrativIE,
|
||||
MirrativUserIE,
|
||||
@@ -1124,13 +1103,7 @@ from .mlb import (
|
||||
MLBArticleIE,
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mnet import MnetIE
|
||||
from .mocha import MochaVideoIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import (
|
||||
MofosexIE,
|
||||
MofosexEmbedIE,
|
||||
)
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .morningstar import MorningstarIE
|
||||
@@ -1140,7 +1113,6 @@ from .motherless import (
|
||||
MotherlessGalleryIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviepilot import MoviepilotIE
|
||||
from .moview import MoviewPlayIE
|
||||
from .moviezine import MoviezineIE
|
||||
@@ -1165,18 +1137,12 @@ from .musicdex import (
|
||||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
)
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
from .mxplayer import (
|
||||
MxplayerIE,
|
||||
MxplayerShowIE,
|
||||
)
|
||||
from .mychannels import MyChannelsIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import (
|
||||
MyviIE,
|
||||
MyviEmbedIE,
|
||||
)
|
||||
from .myvideoge import MyVideoGeIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .mzaalo import MzaaloIE
|
||||
@@ -1225,6 +1191,7 @@ from .ndr import (
|
||||
from .ndtv import NDTVIE
|
||||
from .nebula import (
|
||||
NebulaIE,
|
||||
NebulaClassIE,
|
||||
NebulaSubscriptionsIE,
|
||||
NebulaChannelIE,
|
||||
)
|
||||
@@ -1251,7 +1218,6 @@ from .newgrounds import (
|
||||
NewgroundsUserIE,
|
||||
)
|
||||
from .newspicks import NewsPicksIE
|
||||
from .newstube import NewstubeIE
|
||||
from .newsy import NewsyIE
|
||||
from .nextmedia import (
|
||||
NextMediaIE,
|
||||
@@ -1286,7 +1252,6 @@ from .nick import (
|
||||
NickIE,
|
||||
NickBrIE,
|
||||
NickDeIE,
|
||||
NickNightIE,
|
||||
NickRuIE,
|
||||
)
|
||||
from .niconico import (
|
||||
@@ -1311,17 +1276,15 @@ from .niconicochannelplus import (
|
||||
NiconicoChannelPlusChannelLivesIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenews import NineNewsIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .nitter import NitterIE
|
||||
from .njpwworld import NJPWWorldIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .noice import NoicePodcastIE
|
||||
from .nonktube import NonkTubeIE
|
||||
from .noodlemagazine import NoodleMagazineIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nosnl import NOSNLArticleIE
|
||||
from .nova import (
|
||||
NovaEmbedIE,
|
||||
@@ -1382,7 +1345,10 @@ from .oftv import (
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .olympics import OlympicsReplayIE
|
||||
from .on24 import On24IE
|
||||
from .ondemandkorea import OnDemandKoreaIE
|
||||
from .ondemandkorea import (
|
||||
OnDemandKoreaIE,
|
||||
OnDemandKoreaProgramIE,
|
||||
)
|
||||
from .onefootball import OneFootballIE
|
||||
from .onenewsnz import OneNewsNZIE
|
||||
from .oneplace import OnePlacePodcastIE
|
||||
@@ -1393,10 +1359,6 @@ from .onet import (
|
||||
OnetPlIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
OoyalaIE,
|
||||
OoyalaExternalIE,
|
||||
)
|
||||
from .opencast import (
|
||||
OpencastIE,
|
||||
OpencastPlaylistIE,
|
||||
@@ -1411,6 +1373,7 @@ from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFFM4StoryIE,
|
||||
ORFRadioIE,
|
||||
ORFPodcastIE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .outsidetv import OutsideTVIE
|
||||
@@ -1424,7 +1387,6 @@ from .palcomp3 import (
|
||||
PalcoMP3ArtistIE,
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .panopto import (
|
||||
PanoptoIE,
|
||||
PanoptoListIE,
|
||||
@@ -1452,7 +1414,6 @@ from .peloton import (
|
||||
PelotonIE,
|
||||
PelotonLiveIE
|
||||
)
|
||||
from .people import PeopleIE
|
||||
from .performgroup import PerformGroupIE
|
||||
from .periscope import (
|
||||
PeriscopeIE,
|
||||
@@ -1484,13 +1445,10 @@ from .platzi import (
|
||||
PlatziIE,
|
||||
PlatziCourseIE,
|
||||
)
|
||||
from .playfm import PlayFMIE
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playstuff import PlayStuffIE
|
||||
from .playsuisse import PlaySuisseIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
from .plutotv import PlutoTVIE
|
||||
from .pluralsight import (
|
||||
@@ -1522,9 +1480,7 @@ from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornbox import PornboxIE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
PornHubUserIE,
|
||||
@@ -1535,7 +1491,6 @@ from .pornhub import (
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .pornez import PornezIE
|
||||
from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
@@ -1573,9 +1528,12 @@ from .radiocanada import (
|
||||
RadioCanadaIE,
|
||||
RadioCanadaAudioVideoIE,
|
||||
)
|
||||
from .radiocomercial import (
|
||||
RadioComercialIE,
|
||||
RadioComercialPlaylistIE,
|
||||
)
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import (
|
||||
FranceCultureIE,
|
||||
RadioFranceIE,
|
||||
@@ -1627,7 +1585,6 @@ from .rcti import (
|
||||
RCTIPlusTVIE,
|
||||
)
|
||||
from .rds import RDSIE
|
||||
from .recurbate import RecurbateIE
|
||||
from .redbee import ParliamentLiveUKIE, RTBFIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
@@ -1651,7 +1608,10 @@ from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .rice import RICEIE
|
||||
from .rinsefm import (
|
||||
RinseFMIE,
|
||||
RinseFMArtistPlaylistIE,
|
||||
)
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .rokfin import (
|
||||
@@ -1675,11 +1635,7 @@ from .rtlnl import (
|
||||
RTLLuLiveIE,
|
||||
RTLLuRadioIE,
|
||||
)
|
||||
from .rtl2 import (
|
||||
RTL2IE,
|
||||
RTL2YouIE,
|
||||
RTL2YouSeriesIE,
|
||||
)
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtnews import (
|
||||
RTNewsIE,
|
||||
RTDocumentryIE,
|
||||
@@ -1701,16 +1657,15 @@ from .rtve import (
|
||||
RTVEInfantilIE,
|
||||
RTVETelevisionIE,
|
||||
)
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .rtvslo import RTVSLOIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rule34video import Rule34VideoIE
|
||||
from .rumble import (
|
||||
RumbleEmbedIE,
|
||||
RumbleIE,
|
||||
RumbleChannelIE,
|
||||
)
|
||||
from .rudovideo import RudoVideoIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
@@ -1753,6 +1708,11 @@ from .samplefocus import SampleFocusIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
from .sbscokr import (
|
||||
SBSCoKrIE,
|
||||
SBSCoKrAllvodProgramIE,
|
||||
SBSCoKrProgramsVodIE,
|
||||
)
|
||||
from .screen9 import Screen9IE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastify import ScreencastifyIE
|
||||
@@ -1781,10 +1741,6 @@ from .shahid import (
|
||||
ShahidIE,
|
||||
ShahidShowIE,
|
||||
)
|
||||
from .shared import (
|
||||
SharedIE,
|
||||
VivoIE,
|
||||
)
|
||||
from .sharevideos import ShareVideosEmbedIE
|
||||
from .sibnet import SibnetEmbedIE
|
||||
from .shemaroome import ShemarooMeIE
|
||||
@@ -1862,7 +1818,6 @@ from .spankbang import (
|
||||
SpankBangIE,
|
||||
SpankBangPlaylistIE,
|
||||
)
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .spike import (
|
||||
BellatorIE,
|
||||
@@ -1897,6 +1852,8 @@ from .srmediathek import SRMediathekIE
|
||||
from .stacommu import (
|
||||
StacommuLiveIE,
|
||||
StacommuVODIE,
|
||||
TheaterComplexTownVODIE,
|
||||
TheaterComplexTownPPVIE,
|
||||
)
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .startv import StarTVIE
|
||||
@@ -1910,7 +1867,6 @@ from .storyfire import (
|
||||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streamff import StreamFFIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
@@ -1930,7 +1886,6 @@ from .svt import (
|
||||
SVTSeriesIE,
|
||||
)
|
||||
from .swearnet import SwearnetEpisodeIE
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syvdk import SYVDKIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
@@ -1957,7 +1912,6 @@ from .teamcoco import (
|
||||
ConanClassicIE,
|
||||
)
|
||||
from .teamtreehouse import TeamTreeHouseIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import (
|
||||
TedEmbedIE,
|
||||
TedPlaylistIE,
|
||||
@@ -1992,10 +1946,17 @@ from .tencent import (
|
||||
WeTvSeriesIE,
|
||||
)
|
||||
from .tennistv import TennisTVIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .tenplay import (
|
||||
TenPlayIE,
|
||||
TenPlaySeasonIE,
|
||||
)
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
from .theguardian import (
|
||||
TheGuardianPodcastIE,
|
||||
TheGuardianPodcastPlaylistIE,
|
||||
)
|
||||
from .theholetv import TheHoleTvIE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theplatform import (
|
||||
@@ -2006,7 +1967,6 @@ from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .thisvid import (
|
||||
ThisVidIE,
|
||||
@@ -2028,7 +1988,6 @@ from .tiktok import (
|
||||
TikTokLiveIE,
|
||||
DouyinIE,
|
||||
)
|
||||
from .tinypic import TinyPicIE
|
||||
from .tmz import TMZIE
|
||||
from .tnaflix import (
|
||||
TNAFlixNetworkEmbedIE,
|
||||
@@ -2043,10 +2002,6 @@ from .toggle import (
|
||||
from .toggo import (
|
||||
ToggoIE,
|
||||
)
|
||||
from .tokentube import (
|
||||
TokentubeIE,
|
||||
TokentubeChannelIE
|
||||
)
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutv import TouTvIE
|
||||
@@ -2057,7 +2012,6 @@ from .triller import (
|
||||
TrillerUserIE,
|
||||
TrillerShortIE,
|
||||
)
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovo import (
|
||||
TrovoIE,
|
||||
TrovoVodIE,
|
||||
@@ -2065,6 +2019,7 @@ from .trovo import (
|
||||
TrovoChannelClipIE,
|
||||
)
|
||||
from .trtcocuk import TrtCocukVideoIE
|
||||
from .trtworld import TrtWorldIE
|
||||
from .trueid import TrueIDIE
|
||||
from .trunews import TruNewsIE
|
||||
from .truth import TruthIE
|
||||
@@ -2082,8 +2037,6 @@ from .tunein import (
|
||||
TuneInPodcastEpisodeIE,
|
||||
TuneInShortenerIE,
|
||||
)
|
||||
from .tunepk import TunePkIE
|
||||
from .turbo import TurboIE
|
||||
from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
@@ -2124,16 +2077,7 @@ from .tvigle import TvigleIE
|
||||
from .tviplayer import TVIPlayerIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvn24 import TVN24IE
|
||||
from .tvnet import TVNetIE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvnow import (
|
||||
TVNowIE,
|
||||
TVNowFilmIE,
|
||||
TVNowNewIE,
|
||||
TVNowSeasonIE,
|
||||
TVNowAnnualIE,
|
||||
TVNowShowIE,
|
||||
)
|
||||
from .tvopengr import (
|
||||
TVOpenGrWatchIE,
|
||||
TVOpenGrEmbedIE,
|
||||
@@ -2151,7 +2095,6 @@ from .tvplay import (
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitcasting import (
|
||||
@@ -2200,7 +2143,6 @@ from .drooble import DroobleIE
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
from .unscripted import UnscriptedNewsVideoIE
|
||||
from .unsupported import KnownDRMIE, KnownPiracyIE
|
||||
from .uol import UOLIE
|
||||
from .uplynk import (
|
||||
@@ -2219,7 +2161,6 @@ from .ustudio import (
|
||||
from .utreon import UtreonIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veo import VeoIE
|
||||
from .veoh import (
|
||||
VeohIE,
|
||||
@@ -2241,7 +2182,6 @@ from .vice import (
|
||||
ViceArticleIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videa import VideaIE
|
||||
from .videocampus_sachsen import (
|
||||
@@ -2269,6 +2209,7 @@ from .vidio import (
|
||||
VidioLiveIE
|
||||
)
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidly import VidlyIE
|
||||
from .viewlift import (
|
||||
ViewLiftIE,
|
||||
ViewLiftEmbedIE,
|
||||
@@ -2291,7 +2232,6 @@ from .vimm import (
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
VineIE,
|
||||
VineUserIE,
|
||||
@@ -2300,6 +2240,7 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viously import ViouslyIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
@@ -2315,10 +2256,8 @@ from .vk import (
|
||||
VKPlayLiveIE,
|
||||
)
|
||||
from .vocaroo import VocarooIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodpl import VODPlIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voicy import (
|
||||
VoicyIE,
|
||||
VoicyChannelIE,
|
||||
@@ -2338,23 +2277,13 @@ from .vrt import (
|
||||
KetnetIE,
|
||||
DagelijkseKostIE,
|
||||
)
|
||||
from .vrak import VrakIE
|
||||
from .vrv import (
|
||||
VRVIE,
|
||||
VRVSeriesIE,
|
||||
)
|
||||
from .vshare import VShareIE
|
||||
from .vtm import VTMIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vupload import VuploadIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
VVVVIDShowIE,
|
||||
)
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .wakanim import WakanimIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
@@ -2366,8 +2295,6 @@ from .wasdtv import (
|
||||
WASDTVClipIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .watchbox import WatchBoxIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRPageIE,
|
||||
@@ -2401,7 +2328,6 @@ from .wevidi import WeVidiIE
|
||||
from .weyyak import WeyyakIE
|
||||
from .whyp import WhypIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimbledon import WimbledonIE
|
||||
from .wimtv import WimTVIE
|
||||
from .whowatch import WhoWatchIE
|
||||
@@ -2435,7 +2361,6 @@ from .wykop import (
|
||||
WykopPostCommentIE,
|
||||
)
|
||||
from .xanimu import XanimuIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .xhamster import (
|
||||
@@ -2451,8 +2376,6 @@ from .xinpianchang import XinpianchangIE
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xvideos import (
|
||||
XVideosIE,
|
||||
XVideosQuickiesIE
|
||||
@@ -2482,10 +2405,7 @@ from .yappy import (
|
||||
YappyIE,
|
||||
YappyProfileIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import (
|
||||
YoukuIE,
|
||||
@@ -2561,6 +2481,9 @@ from .zingmp3 import (
|
||||
ZingMp3ChartMusicVideoIE,
|
||||
ZingMp3UserIE,
|
||||
ZingMp3HubIE,
|
||||
ZingMp3LiveRadioIE,
|
||||
ZingMp3PodcastEpisodeIE,
|
||||
ZingMp3PodcastIE,
|
||||
)
|
||||
from .zoom import ZoomIE
|
||||
from .zype import ZypeIE
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -379,6 +380,18 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'noplaylist': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# 'videoEpisodes' is a dict with `items` key
|
||||
'url': 'https://iview.abc.net.au/show/7-30-mark-humphries-satire',
|
||||
'info_dict': {
|
||||
'id': '178458-0',
|
||||
'title': 'Episodes',
|
||||
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
|
||||
'series': '7.30 Mark Humphries Satire',
|
||||
'season': 'Episodes',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -398,12 +411,14 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
series = video_data['selectedSeries']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [self.url_result(episode['shareUrl'])
|
||||
for episode in series['_embedded']['videoEpisodes']],
|
||||
'entries': [self.url_result(episode_url, ABCIViewIE)
|
||||
for episode_url in traverse_obj(series, (
|
||||
'_embedded', 'videoEpisodes', (None, 'items'), ..., 'shareUrl', {url_or_none}))],
|
||||
'id': series.get('id'),
|
||||
'title': dict_get(series, ('title', 'displaySubtitle')),
|
||||
'description': series.get('description'),
|
||||
'series': dict_get(series, ('showTitle', 'displayTitle')),
|
||||
'season': dict_get(series, ('title', 'displaySubtitle')),
|
||||
'thumbnail': series.get('thumbnail'),
|
||||
'thumbnail': traverse_obj(
|
||||
series, 'thumbnail', ('images', lambda _, v: v['name'] == 'seriesThumbnail', 'url'), get_all=False),
|
||||
}
|
||||
|
||||
@@ -92,6 +92,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
|
||||
_USERTOKEN = None
|
||||
_DEVICE_ID = None
|
||||
_MEDIATOKEN = None
|
||||
@@ -136,11 +138,15 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
|
||||
if AbemaTVBaseIE._USERTOKEN:
|
||||
# try authentication with locally stored token
|
||||
try:
|
||||
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
|
||||
self._get_media_token(True)
|
||||
return
|
||||
except ExtractorError as e:
|
||||
@@ -159,7 +165,6 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
@@ -181,6 +186,37 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
auth_cache = {
|
||||
'device_id': AbemaTVBaseIE._DEVICE_ID,
|
||||
'usertoken': AbemaTVBaseIE._USERTOKEN,
|
||||
}
|
||||
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
|
||||
|
||||
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
|
||||
return self._download_json(
|
||||
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
|
||||
@@ -204,14 +240,14 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
class AbemaTVIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
|
||||
'info_dict': {
|
||||
'id': '194-25_s2_p1',
|
||||
'title': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'series': '異世界食堂2',
|
||||
'series_number': 2,
|
||||
'season': 'シーズン2',
|
||||
'season_number': 2,
|
||||
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'episode_number': 1,
|
||||
},
|
||||
@@ -252,33 +288,6 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
}]
|
||||
_TIMETABLE = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# starting download using infojson from this extractor is undefined behavior,
|
||||
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
|
||||
@@ -347,12 +356,12 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
)?
|
||||
''', r'\1', og_desc)
|
||||
|
||||
# canonical URL may contain series and episode number
|
||||
# canonical URL may contain season and episode number
|
||||
mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
|
||||
if mobj:
|
||||
seri = int_or_none(mobj.group(1), default=float('inf'))
|
||||
epis = int_or_none(mobj.group(2), default=float('inf'))
|
||||
info['series_number'] = seri if seri < 100 else None
|
||||
info['season_number'] = seri if seri < 100 else None
|
||||
# some anime like Detective Conan (though not available in AbemaTV)
|
||||
# has more than 1000 episodes (1026 as of 2021/11/15)
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
@@ -381,7 +390,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'title'),
|
||||
'season': ('season', 'name'),
|
||||
'season_number': ('season', 'sequence'),
|
||||
'episode_number': ('episode', 'number'),
|
||||
}))
|
||||
|
||||
@@ -19,15 +19,35 @@ from ..utils import (
|
||||
long_to_bytes,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
class ADNBaseIE(InfoExtractor):
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = f'https://gw.api.{_BASE}/'
|
||||
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
@@ -44,29 +64,35 @@ class ADNIE(InfoExtractor):
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'duration': 1417,
|
||||
'release_date': '20231004',
|
||||
'series': 'The Eminence in Shadow',
|
||||
'season_number': 2,
|
||||
'episode': str,
|
||||
'title': str,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 2',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'description': str,
|
||||
},
|
||||
# 'skip': 'Only available in French and German speaking Europe',
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
@@ -116,6 +142,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
if sub_lang == 'vostf':
|
||||
sub_lang = 'fr'
|
||||
elif sub_lang == 'vostde':
|
||||
sub_lang = 'de'
|
||||
subtitles.setdefault(sub_lang, []).extend([{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(sub),
|
||||
@@ -147,7 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
@@ -162,7 +190,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
'X-Player-Refresh-Token': user['refreshToken'],
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
@@ -184,7 +212,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
@@ -232,6 +262,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if format_id == 'vf':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
elif format_id == 'vde':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'de'
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
video = (self._download_json(
|
||||
@@ -255,3 +288,40 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
'title': 'Tokyo Mew Mew New',
|
||||
},
|
||||
# 'skip': 'Only available in French end German speaking Europe',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
}, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(
|
||||
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
|
||||
ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
||||
@@ -93,7 +93,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
@@ -121,11 +121,21 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:a40e370925074260b1c8a633c632c63a',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 2592.0,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:5',
|
||||
'tags': 'count:14',
|
||||
'categories': ['Mountain Men'],
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Mountain Men',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'timestamp': 1452634428,
|
||||
'upload_date': '20160112',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 1277.695,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:4',
|
||||
'tags': 'count:23',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 9',
|
||||
'season_number': 9,
|
||||
'series': 'Duck Dynasty',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class AirMozillaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
||||
_TEST = {
|
||||
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
||||
'md5': '8d02f53ee39cf006009180e21df1f3ba',
|
||||
'info_dict': {
|
||||
'id': '6x4q2w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||
'thumbnail': r're:https?://.*/poster\.jpg',
|
||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||
'timestamp': 1422487800,
|
||||
'upload_date': '20150128',
|
||||
'location': 'SFO Commons',
|
||||
'duration': 3780,
|
||||
'view_count': int,
|
||||
'categories': ['Main', 'Privacy'],
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
|
||||
|
||||
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
||||
jwconfig = self._parse_json(self._search_regex(
|
||||
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views since archived: ([0-9]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': self._og_search_url(webpage),
|
||||
'display_id': display_id,
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': timestamp,
|
||||
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
||||
})
|
||||
|
||||
return info_dict
|
||||
253
yt_dlp/extractor/allstar.py
Normal file
253
yt_dlp/extractor/allstar.py
Normal file
@@ -0,0 +1,253 @@
|
||||
import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
_FIELDS = '''
|
||||
_id
|
||||
clipImageSource
|
||||
clipImageThumb
|
||||
clipLink
|
||||
clipTitle
|
||||
createdDate
|
||||
shareId
|
||||
user { _id }
|
||||
username
|
||||
views'''
|
||||
|
||||
_EXTRA_FIELDS = '''
|
||||
clipLength
|
||||
clipSizeBytes'''
|
||||
|
||||
_QUERIES = {
|
||||
'clip': '''query ($id: String!) {
|
||||
video: getClip(clipIdentifier: $id) {
|
||||
%s %s
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
'montage': '''query ($id: String!) {
|
||||
video: getMontage(clipIdentifier: $id) {
|
||||
%s
|
||||
}
|
||||
}''' % _FIELDS,
|
||||
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
'Montages': '''query ($page: Int!, $user: String!) {
|
||||
videos: montages(search: createdDate, page: $page, user: $user) {
|
||||
data { %s }
|
||||
}
|
||||
}''' % _FIELDS,
|
||||
'Mobile Clips': '''query ($page: Int!, $user: String!) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
}
|
||||
|
||||
|
||||
class AllstarBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _parse_video_data(video_data):
|
||||
def media_url_or_none(path):
|
||||
return urljoin('https://media.allstar.gg/', path)
|
||||
|
||||
info = traverse_obj(video_data, {
|
||||
'id': ('_id', {str}),
|
||||
'display_id': ('shareId', {str}),
|
||||
'title': ('clipTitle', {str}),
|
||||
'url': ('clipLink', {media_url_or_none}),
|
||||
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
|
||||
'duration': ('clipLength', {int_or_none}),
|
||||
'filesize': ('clipSizeBytes', {int_or_none}),
|
||||
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'uploader': ('username', {str}),
|
||||
'uploader_id': ('user', '_id', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
})
|
||||
|
||||
if info.get('id') and info.get('url'):
|
||||
basename = 'clip' if '/clips/' in info['url'] else 'montage'
|
||||
info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}'
|
||||
|
||||
info.update({
|
||||
'extractor_key': AllstarIE.ie_key(),
|
||||
'extractor': AllstarIE.IE_NAME,
|
||||
'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')),
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, query, variables, path, video_id=None, note=None):
|
||||
response = self._download_json(
|
||||
'https://a1.allstar.gg/graphql', video_id, note=note,
|
||||
headers={'content-type': 'application/json'},
|
||||
data=json.dumps({'variables': variables, 'query': query}).encode())
|
||||
|
||||
errors = traverse_obj(response, ('errors', ..., 'message', {str}))
|
||||
if errors:
|
||||
raise ExtractorError('; '.join(errors))
|
||||
|
||||
return traverse_obj(response, path)
|
||||
|
||||
|
||||
class AllstarIE(AllstarBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b',
|
||||
'info_dict': {
|
||||
'id': '64482c2da9eec30008a67d1b',
|
||||
'title': '4K on Inferno',
|
||||
'url': 'md5:66befb5381eef0c9456026386c25fa55',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'uploader': 'chrk.',
|
||||
'ext': 'mp4',
|
||||
'duration': 20,
|
||||
'filesize': 21199257,
|
||||
'timestamp': 1682451501,
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230425',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
|
||||
'info_dict': {
|
||||
'id': '64a1ec6b887f4c0008dc50b8',
|
||||
'display_id': '8LJLY4JKB',
|
||||
'title': 'AK-47 3K on Mirage',
|
||||
'url': 'md5:dde224fd12f035c0e2529a4ae34c4283',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'duration': 16,
|
||||
'filesize': 30175859,
|
||||
'timestamp': 1688333419,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230702',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
|
||||
'info_dict': {
|
||||
'id': '643e64089da7e9363e1fa66c',
|
||||
'display_id': 'APQLGM2IMXW',
|
||||
'title': 'cherokee Rapid Fire Snipers Montage',
|
||||
'url': 'md5:a3ee356022115db2b27c81321d195945',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1681810448,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230418',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
|
||||
'info_dict': {
|
||||
'id': '64a2697372ce3703de29e868',
|
||||
'display_id': 'RILJMH6QOS',
|
||||
'title': 'cherokee Rapid Fire Snipers Montage',
|
||||
'url': 'md5:d5672e6f88579730c2310a80fdbc4030',
|
||||
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1688365434,
|
||||
'uploader': 'cherokee',
|
||||
'uploader_id': '62b8bdfc9021052f7905882d',
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230703',
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query_id, video_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
return self._parse_video_data(
|
||||
self._call_api(
|
||||
_QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id))
|
||||
|
||||
|
||||
class AllstarProfileIE(AllstarBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-clips',
|
||||
'title': 'cherokee - Clips',
|
||||
},
|
||||
'playlist_mincount': 15
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-clips-730',
|
||||
'title': 'cherokee - Clips - 730',
|
||||
},
|
||||
'playlist_mincount': 15
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-montages',
|
||||
'title': 'cherokee - Montages',
|
||||
},
|
||||
'playlist_mincount': 4
|
||||
}, {
|
||||
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-mobile',
|
||||
'title': 'cherokee - Mobile Clips',
|
||||
},
|
||||
'playlist_mincount': 1
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _get_page(self, user_id, display_id, game, query, page_num):
|
||||
page_num += 1
|
||||
|
||||
for video_data in self._call_api(
|
||||
query, {
|
||||
'user': user_id,
|
||||
'page': page_num,
|
||||
'game': game,
|
||||
}, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'):
|
||||
yield self._parse_video_data(video_data)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
profile_data = self._download_json(
|
||||
urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id)
|
||||
user_id = traverse_obj(profile_data, ('data', ('_id'), {str}))
|
||||
if not user_id:
|
||||
raise ExtractorError('Unable to extract the user id')
|
||||
|
||||
username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str}))
|
||||
url_query = parse_qs(url)
|
||||
game = traverse_obj(url_query, ('game', 0, {int_or_none}))
|
||||
query_id = traverse_obj(url_query, ('view', 0), default='Clips')
|
||||
|
||||
if query_id not in ('Clips', 'Montages', 'Mobile Clips'):
|
||||
raise ExtractorError(f'Unsupported playlist URL type {query_id!r}')
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(
|
||||
functools.partial(
|
||||
self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE),
|
||||
playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game),
|
||||
playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - '))
|
||||
96
yt_dlp/extractor/altcensored.py
Normal file
96
yt_dlp/extractor/altcensored.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import re
|
||||
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AltCensoredIE(InfoExtractor):
|
||||
IE_NAME = 'altcensored'
|
||||
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/(?:watch\?v=|embed/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8',
|
||||
'info_dict': {
|
||||
'id': 'youtube-k0srjLSkga8',
|
||||
'ext': 'webm',
|
||||
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
|
||||
'display_id': 'k0srjLSkga8.webm',
|
||||
'release_date': '20180403',
|
||||
'creator': 'Virginie Vota',
|
||||
'release_year': 2018,
|
||||
'upload_date': '20230318',
|
||||
'uploader': 'admin@altcensored.com',
|
||||
'description': 'md5:0b38a8fc04103579d5c1db10a247dc30',
|
||||
'timestamp': 1679161343,
|
||||
'track': 'k0srjLSkga8',
|
||||
'duration': 926.09,
|
||||
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://archive.org/details/youtube-{video_id}',
|
||||
'ie_key': ArchiveOrgIE.ie_key(),
|
||||
'view_count': str_to_int(self._html_search_regex(
|
||||
r'YouTube Views:(?:\s| )*([\d,]+)', webpage, 'view count', default=None)),
|
||||
'categories': self._html_search_regex(
|
||||
r'<a href="/category/\d+">\s*\n?\s*([^<]+)</a>',
|
||||
webpage, 'category', default='').split() or None,
|
||||
}
|
||||
|
||||
|
||||
class AltCensoredChannelIE(InfoExtractor):
|
||||
IE_NAME = 'altcensored:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/channel/(?!page|table)(?P<id>[^/?#]+)'
|
||||
_PAGE_SIZE = 24
|
||||
_TESTS = [{
|
||||
'url': 'https://www.altcensored.com/channel/UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
'info_dict': {
|
||||
'title': 'Virginie Vota',
|
||||
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
},
|
||||
'playlist_count': 91
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
'info_dict': {
|
||||
'title': 'yukikaze775',
|
||||
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
},
|
||||
'playlist_count': 4
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
|
||||
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
|
||||
page_count = int_or_none(self._html_search_regex(
|
||||
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>',
|
||||
webpage, 'page count', default='1'))
|
||||
|
||||
def page_func(page_num):
|
||||
page_num += 1
|
||||
webpage = self._download_webpage(
|
||||
f'https://altcensored.com/channel/{channel_id}/page/{page_num}',
|
||||
channel_id, note=f'Downloading page {page_num}')
|
||||
|
||||
items = re.findall(r'<a[^>]+href="(/watch\?v=[^"]+)', webpage)
|
||||
return [self.url_result(urljoin('https://www.altcensored.com', path), AltCensoredIE)
|
||||
for path in orderedSet(items)]
|
||||
|
||||
return self.playlist_result(
|
||||
InAdvancePagedList(page_func, page_count, self._PAGE_SIZE),
|
||||
playlist_id=channel_id, playlist_title=title)
|
||||
77
yt_dlp/extractor/amadeustv.py
Normal file
77
yt_dlp/extractor/amadeustv.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AmadeusTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
|
||||
'info_dict': {
|
||||
'id': '5576678021301411311',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
|
||||
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
|
||||
'duration': 1264.8,
|
||||
'upload_date': '20230918',
|
||||
'timestamp': 1695034800,
|
||||
'display_id': '65091a87ff85af59d9fc54c3',
|
||||
'view_count': int,
|
||||
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
|
||||
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
|
||||
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract actual video ID')
|
||||
|
||||
video_data = self._download_json(
|
||||
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
|
||||
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
|
||||
if not url_or_none(video.get('url')):
|
||||
continue
|
||||
formats.append({
|
||||
**traverse_obj(video, {
|
||||
'url': 'url',
|
||||
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': (('totalSize', 'size'), {int_or_none}),
|
||||
'vcodec': ('videoStreamList', 0, 'codec'),
|
||||
'acodec': ('audioStreamList', 0, 'codec'),
|
||||
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
|
||||
}, get_all=False),
|
||||
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoInfo', 'basicInfo', 'name', {str}),
|
||||
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
|
||||
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
|
||||
}, get_all=False),
|
||||
**traverse_obj(nuxt_data, ('item', {
|
||||
'title': (('title', 'title_en', 'title_cn'), {str}),
|
||||
'description': (('description', 'description_en', 'description_cn'), {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'view_count': ('view', {int_or_none}),
|
||||
}), get_all=False),
|
||||
}
|
||||
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
_WORKING = False
|
||||
IE_NAME = 'aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||
|
||||
|
||||
@@ -52,7 +52,6 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'creator': 'SRI International',
|
||||
'uploader': 'laura@archive.org',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'release_year': 1968,
|
||||
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
|
||||
'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect',
|
||||
|
||||
@@ -134,7 +133,6 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'album': '1977-05-08 - Barton Hall - Cornell University',
|
||||
'release_date': '19770508',
|
||||
'display_id': 'gd1977-05-08d01t07.flac',
|
||||
'release_year': 1977,
|
||||
'track_number': 7,
|
||||
},
|
||||
}, {
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
import json
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
bug_reports_message,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
parse_duration,
|
||||
qualities,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ARDMediathekBaseIE(InfoExtractor):
|
||||
@@ -61,45 +61,6 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _ARD_extract_episode_info(self, title):
|
||||
"""Try to extract season/episode data from the title."""
|
||||
res = {}
|
||||
if not title:
|
||||
return res
|
||||
|
||||
for pattern in [
|
||||
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
|
||||
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
|
||||
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
|
||||
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
|
||||
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
|
||||
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
|
||||
# E.g.: title="Folge 25/42: Symmetrie"
|
||||
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
|
||||
# E.g.: title="Folge 1063 - Vertrauen"
|
||||
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
|
||||
]:
|
||||
m = re.match(pattern, title)
|
||||
if m:
|
||||
groupdict = m.groupdict()
|
||||
res['season_number'] = int_or_none(groupdict.get('season_number'))
|
||||
res['episode_number'] = int_or_none(groupdict.get('episode_number'))
|
||||
res['episode'] = str_or_none(groupdict.get('episode'))
|
||||
# Build the episode title by removing numeric episode information:
|
||||
if groupdict.get('ep_info') and not res['episode']:
|
||||
res['episode'] = str_or_none(
|
||||
title.replace(groupdict.get('ep_info'), ''))
|
||||
if res['episode']:
|
||||
res['episode'] = res['episode'].strip()
|
||||
break
|
||||
|
||||
# As a fallback use the whole title as the episode name:
|
||||
if not res.get('episode'):
|
||||
res['episode'] = title.strip()
|
||||
return res
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
type_ = media_info.get('_type')
|
||||
media_array = media_info.get('_mediaArray', [])
|
||||
@@ -155,144 +116,12 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
return formats
|
||||
|
||||
|
||||
class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
|
||||
'info_dict': {
|
||||
'id': '44726822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
|
||||
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# determine video id from url
|
||||
m = self._match_valid_url(url)
|
||||
|
||||
document_id = None
|
||||
|
||||
numid = re.search(r'documentId=([0-9]+)', url)
|
||||
if numid:
|
||||
document_id = video_id = numid.group(1)
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
ERRORS = (
|
||||
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
|
||||
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
|
||||
'Video %s is no longer available'),
|
||||
)
|
||||
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
raise ExtractorError(message % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'meta description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+class="teasertext">(.+?)</p>',
|
||||
webpage, 'teaser text', default=None)
|
||||
|
||||
# Thumbnail is sometimes not present.
|
||||
# It is in the mobile version, but that seems to use a different URL
|
||||
# structure altogether.
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
media_streams = re.findall(r'''(?x)
|
||||
mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
|
||||
"([^"]+)"''', webpage)
|
||||
|
||||
if media_streams:
|
||||
QUALITIES = qualities(['lo', 'hi', 'hq'])
|
||||
formats = []
|
||||
for furl in set(media_streams):
|
||||
if furl.endswith('.f4m'):
|
||||
fid = 'f4m'
|
||||
else:
|
||||
fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
|
||||
fid = fid_m.group(1) if fid_m else None
|
||||
formats.append({
|
||||
'quality': QUALITIES(fid),
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
})
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else: # request JSON file
|
||||
if not document_id:
|
||||
video_id = self._search_regex(
|
||||
(r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
|
||||
webpage, 'media id', default=None)
|
||||
info = self._extract_media_info(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id,
|
||||
webpage, video_id)
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
info.update(self._ARD_extract_episode_info(info['title']))
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 7.12.2023
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
|
||||
'md5': 'a438f671e87a7eba04000336a119ccc4',
|
||||
'md5': '94812e6438488fb923c361a44469614b',
|
||||
'info_dict': {
|
||||
'id': 'maischberger-video-424',
|
||||
'display_id': 'maischberger-video-424',
|
||||
@@ -399,31 +228,35 @@ class ARDIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:(?P<client>[^/]+)/)?
|
||||
(?:player|live|video|(?P<playlist>sendung|sammlung))/
|
||||
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
(?:[^?#]+/)?
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
||||
'md5': '3fd5fead7a370a819341129c8d713136',
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
|
||||
'info_dict': {
|
||||
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
||||
'id': '12172961',
|
||||
'title': 'Wolfsland - Die traurigen Schwestern',
|
||||
'description': r're:^Als der Polizeiobermeister Raaben',
|
||||
'duration': 5241,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
||||
'timestamp': 1670710500,
|
||||
'upload_date': '20221210',
|
||||
'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'id': '12939099',
|
||||
'title': 'Liebe auf vier Pfoten',
|
||||
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
|
||||
'duration': 5222,
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
|
||||
'timestamp': 1701343800,
|
||||
'upload_date': '20231130',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 12,
|
||||
'episode': 'Wolfsland - Die traurigen Schwestern',
|
||||
'series': 'Filme im MDR'
|
||||
'episode': 'Liebe auf vier Pfoten',
|
||||
'series': 'Filme im MDR',
|
||||
'age_limit': 0,
|
||||
'channel': 'MDR',
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
@@ -450,11 +283,31 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'timestamp': 1636398000,
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
|
||||
'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'duration': 915,
|
||||
'episode': 'tagesschau, 20:00 Uhr',
|
||||
'series': 'tagesschau',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
|
||||
'channel': 'ARD-Aktuell',
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'md5': 'c428b9effff18ff624d4f903bda26315',
|
||||
'info_dict': {
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2700,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
'timestamp': 1696491171,
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
@@ -471,203 +324,239 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_episode_info(self, title):
|
||||
patterns = [
|
||||
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
|
||||
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
|
||||
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
|
||||
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
|
||||
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
|
||||
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
|
||||
# E.g.: title="Folge 25/42: Symmetrie"
|
||||
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
|
||||
# E.g.: title="Folge 1063 - Vertrauen"
|
||||
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
|
||||
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
|
||||
# As a fallback use the full title
|
||||
r'(?P<title>.*)',
|
||||
]
|
||||
|
||||
return traverse_obj(patterns, (..., {partial(re.match, string=title)}, {
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'episode': ((
|
||||
('episode', {str_or_none}),
|
||||
('ep_info', {lambda x: title.replace(x, '')}),
|
||||
('title', {str}),
|
||||
), {str.strip}),
|
||||
}), get_all=False)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
page_data = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={
|
||||
'embedded': 'false',
|
||||
'mcV6': 'true',
|
||||
})
|
||||
|
||||
# For user convenience we use the old contentId instead of the longer crid
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
|
||||
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
|
||||
if old_id is not None:
|
||||
video_id = str(old_id)
|
||||
archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
|
||||
else:
|
||||
self.report_warning(f'Could not extract contentId{bug_reports_message()}')
|
||||
video_id = display_id
|
||||
archive_ids = None
|
||||
|
||||
player_data = traverse_obj(
|
||||
page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
|
||||
is_live = player_data.get('type') == 'player_live'
|
||||
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
|
||||
|
||||
if player_data.get('blockedByFsk'):
|
||||
self.raise_no_formats('This video is only available after 22:00', expected=True)
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for stream in traverse_obj(media_data, ('streams', ..., {dict})):
|
||||
kind = stream.get('kind')
|
||||
# Prioritize main stream over sign language and others
|
||||
preference = 1 if kind == 'main' else None
|
||||
for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))):
|
||||
media_url = media['url']
|
||||
|
||||
audio_kind = traverse_obj(media, (
|
||||
'audios', 0, 'kind', {str}), default='').replace('standard', '')
|
||||
lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu'
|
||||
lang = join_nonempty(lang_code, audio_kind)
|
||||
language_preference = 10 if lang == 'deu' else -10
|
||||
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live)
|
||||
for f in fmts:
|
||||
f['language'] = lang
|
||||
f['language_preference'] = language_preference
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'format_id': f'http-{kind}',
|
||||
'preference': preference,
|
||||
'language': lang,
|
||||
'language_preference': language_preference,
|
||||
**traverse_obj(media, {
|
||||
'format_note': ('forcedLabel', {str}),
|
||||
'width': ('maxHResolutionPx', {int_or_none}),
|
||||
'height': ('maxVResolutionPx', {int_or_none}),
|
||||
'vcodec': ('videoCodec', {str}),
|
||||
}),
|
||||
})
|
||||
|
||||
for sub in traverse_obj(media_data, ('subtitles', ..., {dict})):
|
||||
for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))):
|
||||
subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({
|
||||
'url': sources['url'],
|
||||
'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')),
|
||||
})
|
||||
|
||||
age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'age_limit': age_limit,
|
||||
**traverse_obj(media_data, ('meta', {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
'timestamp': ('broadcastedOnDateTime', {parse_iso8601}),
|
||||
'series': 'seriesTitle',
|
||||
'thumbnail': ('images', 0, 'url', {url_or_none}),
|
||||
'duration': ('durationSeconds', {int_or_none}),
|
||||
'channel': 'clipSourceName',
|
||||
})),
|
||||
**self._extract_episode_info(page_data.get('title')),
|
||||
'_old_archive_ids': archive_ids,
|
||||
}
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
(?:(?P<display_id>[^?#]+?)/)?
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
(?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV',
|
||||
'display_id': 'quiz/staffel-1-originalversion',
|
||||
'title': 'Staffel 1 Originalversion',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD',
|
||||
'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription',
|
||||
'title': 'Staffel 4 mit Audiodeskription',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1',
|
||||
'display_id': 'babylon-berlin/staffel-1',
|
||||
'title': 'Staffel 1',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
|
||||
'display_id': 'tatort',
|
||||
'title': 'Tatort',
|
||||
},
|
||||
'playlist_mincount': 500,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2',
|
||||
'info_dict': {
|
||||
'id': '5eOHzt8XB2sqeFXbIoJlg2',
|
||||
'display_id': 'die-kirche-bleibt-im-dorf',
|
||||
'title': 'Die Kirche bleibt im Dorf',
|
||||
'description': 'Die Kirche bleibt im Dorf',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
# playlist of type 'sendung'
|
||||
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist of type 'serie'
|
||||
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist of type 'sammlung'
|
||||
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
|
||||
""" Query the ARD server for playlist information
|
||||
and returns the data in "raw" format """
|
||||
if mode == 'sendung':
|
||||
graphQL = json.dumps({
|
||||
'query': '''{
|
||||
showPage(
|
||||
client: "%s"
|
||||
showId: "%s"
|
||||
pageNumber: %d
|
||||
) {
|
||||
pagination {
|
||||
pageSize
|
||||
totalElements
|
||||
}
|
||||
teasers { # Array
|
||||
mediumTitle
|
||||
links { target { id href title } }
|
||||
type
|
||||
}
|
||||
}}''' % (client, playlist_id, pageNumber),
|
||||
}).encode()
|
||||
else: # mode == 'sammlung'
|
||||
graphQL = json.dumps({
|
||||
'query': '''{
|
||||
morePage(
|
||||
client: "%s"
|
||||
compilationId: "%s"
|
||||
pageNumber: %d
|
||||
) {
|
||||
widget {
|
||||
pagination {
|
||||
pageSize
|
||||
totalElements
|
||||
}
|
||||
teasers { # Array
|
||||
mediumTitle
|
||||
links { target { id href title } }
|
||||
type
|
||||
}
|
||||
}
|
||||
}}''' % (client, playlist_id, pageNumber),
|
||||
}).encode()
|
||||
# Ressources for ARD graphQL debugging:
|
||||
# https://api-test.ardmediathek.de/public-gateway
|
||||
show_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
'[Playlist] %s' % display_id,
|
||||
data=graphQL,
|
||||
headers={'Content-Type': 'application/json'})['data']
|
||||
# align the structure of the returned data:
|
||||
if mode == 'sendung':
|
||||
show_page = show_page['showPage']
|
||||
else: # mode == 'sammlung'
|
||||
show_page = show_page['morePage']['widget']
|
||||
return show_page
|
||||
|
||||
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
|
||||
""" Collects all playlist entries and returns them as info dict.
|
||||
Supports playlists of mode 'sendung' and 'sammlung', and also nested
|
||||
playlists. """
|
||||
entries = []
|
||||
pageNumber = 0
|
||||
while True: # iterate by pageNumber
|
||||
show_page = self._ARD_load_playlist_snipped(
|
||||
playlist_id, display_id, client, mode, pageNumber)
|
||||
for teaser in show_page['teasers']: # process playlist items
|
||||
if '/compilation/' in teaser['links']['target']['href']:
|
||||
# alternativ cond.: teaser['type'] == "compilation"
|
||||
# => This is an nested compilation, e.g. like:
|
||||
# https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/
|
||||
link_mode = 'sammlung'
|
||||
else:
|
||||
link_mode = 'video'
|
||||
|
||||
item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % (
|
||||
client, link_mode, display_id,
|
||||
# perform HTLM quoting of episode title similar to ARD:
|
||||
re.sub('^-|-$', '', # remove '-' from begin/end
|
||||
re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by -
|
||||
teaser['links']['target']['title'].lower()
|
||||
.replace('ä', 'ae').replace('ö', 'oe')
|
||||
.replace('ü', 'ue').replace('ß', 'ss'))),
|
||||
teaser['links']['target']['id'])
|
||||
entries.append(self.url_result(
|
||||
item_url,
|
||||
ie=ARDBetaMediathekIE.ie_key()))
|
||||
|
||||
if (show_page['pagination']['pageSize'] * (pageNumber + 1)
|
||||
>= show_page['pagination']['totalElements']):
|
||||
# we've processed enough pages to get all playlist entries
|
||||
break
|
||||
pageNumber = pageNumber + 1
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title=display_id)
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'client', 'season')
|
||||
display_id, client = display_id or video_id, client or 'ard'
|
||||
playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'season', 'version')
|
||||
|
||||
if playlist_type:
|
||||
# TODO: Extract only specified season
|
||||
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
|
||||
def call_api(page_num):
|
||||
api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset'
|
||||
return self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id,
|
||||
f'Downloading playlist page {page_num}', query={
|
||||
'pageNumber': page_num,
|
||||
'pageSize': self._PAGE_SIZE,
|
||||
**({
|
||||
'seasoned': 'true',
|
||||
'seasonNumber': season_number,
|
||||
'withOriginalversion': 'true' if version == 'OV' else 'false',
|
||||
'withAudiodescription': 'true' if version == 'AD' else 'false',
|
||||
} if season_number else {}),
|
||||
})
|
||||
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
display_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client:"%s", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
mediaCollection {
|
||||
_duration
|
||||
_geoblocked
|
||||
_isLive
|
||||
_mediaArray {
|
||||
_mediaStreamArray {
|
||||
_quality
|
||||
_server
|
||||
_stream
|
||||
}
|
||||
}
|
||||
_previewImage
|
||||
_subtitleUrl
|
||||
_type
|
||||
}
|
||||
show {
|
||||
title
|
||||
}
|
||||
image {
|
||||
src
|
||||
}
|
||||
synopsis
|
||||
title
|
||||
tracking {
|
||||
atiCustomVars {
|
||||
contentId
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (client, video_id),
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
title = player_page['title']
|
||||
content_id = str_or_none(try_get(
|
||||
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
|
||||
media_collection = player_page.get('mediaCollection') or {}
|
||||
if not media_collection and content_id:
|
||||
media_collection = self._download_json(
|
||||
'https://www.ardmediathek.de/play/media/' + content_id,
|
||||
content_id, fatal=False) or {}
|
||||
info = self._parse_media_info(
|
||||
media_collection, content_id or video_id,
|
||||
player_page.get('blockedByFsk'))
|
||||
age_limit = None
|
||||
description = player_page.get('synopsis')
|
||||
maturity_content_rating = player_page.get('maturityContentRating')
|
||||
if maturity_content_rating:
|
||||
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
|
||||
if not age_limit and description:
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||
'thumbnail': (media_collection.get('_previewImage')
|
||||
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
|
||||
or self.get_thumbnail_from_html(display_id, url)),
|
||||
})
|
||||
info.update(self._ARD_extract_episode_info(info['title']))
|
||||
return info
|
||||
def fetch_page(page_num):
|
||||
for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})):
|
||||
item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False)
|
||||
if not item_id or item_id == playlist_id:
|
||||
continue
|
||||
item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video'
|
||||
yield self.url_result(
|
||||
f'https://www.ardmediathek.de/{item_mode}/{item_id}',
|
||||
ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE),
|
||||
**traverse_obj(item, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('longTitle', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('broadcastedOn', {parse_iso8601}),
|
||||
}))
|
||||
|
||||
def get_thumbnail_from_html(self, display_id, url):
|
||||
webpage = self._download_webpage(url, display_id, fatal=False) or ''
|
||||
return (
|
||||
self._og_search_thumbnail(webpage, default=None)
|
||||
or self._html_search_meta('thumbnailUrl', webpage, default=None))
|
||||
page_data = call_api(0)
|
||||
full_id = join_nonempty(playlist_id, season_number, version, delim='_')
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id,
|
||||
title=page_data.get('title'), description=page_data.get('synopsis'))
|
||||
|
||||
@@ -48,17 +48,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
}, {
|
||||
'note': 'No alt_title',
|
||||
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
||||
'info_dict': {
|
||||
'id': '110371-000-A',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220718',
|
||||
'duration': 154,
|
||||
'timestamp': 1658162460,
|
||||
'description': 'md5:5890f36fe7dccfadb8b7c0891de54786',
|
||||
'title': 'La chaleur, supplice des arbres de rue',
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
@@ -67,19 +57,37 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'note': 'age-restricted',
|
||||
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
|
||||
'info_dict': {
|
||||
'id': '110203-006-A',
|
||||
'chapters': 'count:16',
|
||||
'description': 'md5:cf592f1df52fe52007e3f8eac813c084',
|
||||
'alt_title': 'Zaz',
|
||||
'title': 'Baloise Session 2022',
|
||||
'timestamp': 1668445200,
|
||||
'duration': 4054,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530',
|
||||
'upload_date': '20221114',
|
||||
'id': '006785-000-A',
|
||||
'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba',
|
||||
'title': 'The Element of Crime',
|
||||
'timestamp': 1696111200,
|
||||
'duration': 5849,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
|
||||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'expected_warnings': ['geo restricted']
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
||||
'info_dict': {
|
||||
'id': '085374-003-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
||||
'timestamp': 1702872000,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
||||
'duration': 2594,
|
||||
'title': 'Die kurze Zeit der Jugend',
|
||||
'alt_title': 'Im hohen Norden geboren',
|
||||
'upload_date': '20231218',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
@@ -130,13 +138,25 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_accessible_subs_locale(subs):
|
||||
updated_subs = {}
|
||||
for lang, sub_formats in subs.items():
|
||||
for format in sub_formats:
|
||||
if format.get('url', '').endswith('-MAL.m3u8'):
|
||||
lang += '-acc'
|
||||
updated_subs.setdefault(lang, []).append(format)
|
||||
return updated_subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
langauge_code = self._LANG_MAP.get(lang)
|
||||
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id)
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
||||
'x-validated-age': '18'
|
||||
})
|
||||
|
||||
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
|
||||
if geoblocking.get('restrictedArea'):
|
||||
@@ -181,6 +201,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
secondary_formats.extend(fmts)
|
||||
else:
|
||||
formats.extend(fmts)
|
||||
subs = self._fix_accessible_subs_locale(subs)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
||||
|
||||
168
yt_dlp/extractor/asobichannel.py
Normal file
168
yt_dlp/extractor/asobichannel.py
Normal file
@@ -0,0 +1,168 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AsobiChannelBaseIE(InfoExtractor):
|
||||
_MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
|
||||
|
||||
def _extract_info(self, metadata):
|
||||
return traverse_obj(metadata, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
})
|
||||
|
||||
|
||||
class AsobiChannelIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
|
||||
'md5': '39df74e872afe032c4eb27b89144fc92',
|
||||
'info_dict': {
|
||||
'id': '1ypp48qd32p',
|
||||
'ext': 'mp4',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
|
||||
'timestamp': 1697098247,
|
||||
'upload_date': '20231012',
|
||||
'modified_timestamp': 1698381162,
|
||||
'modified_date': '20231027',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
|
||||
'md5': '229fa8fb5c591c75ce8c37a497f113f6',
|
||||
'info_dict': {
|
||||
'id': 'redigiwnjzqj',
|
||||
'ext': 'mp4',
|
||||
'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
|
||||
'modified_timestamp': 1697797125,
|
||||
'modified_date': '20231020',
|
||||
'timestamp': 1697261769,
|
||||
'upload_date': '20231014',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}]
|
||||
|
||||
_survapi_header = None
|
||||
|
||||
def _real_initialize(self):
|
||||
token = self._download_json(
|
||||
'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
|
||||
note='Retrieving API token')
|
||||
self._survapi_header = {'Authorization': f'Bearer {token}'}
|
||||
|
||||
def _process_vod(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
|
||||
vod_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
|
||||
headers=self._survapi_header, note='Downloading vod data')
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
|
||||
}
|
||||
|
||||
def _process_live(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
event_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
|
||||
headers=self._survapi_header, note='Downloading event data')
|
||||
|
||||
player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
|
||||
if player_type == 'poster':
|
||||
self.raise_no_formats('Live event has not yet started', expected=True)
|
||||
live_status = 'is_upcoming'
|
||||
formats = []
|
||||
elif player_type == 'player':
|
||||
live_status = 'is_live'
|
||||
formats = self._extract_m3u8_formats(
|
||||
event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
|
||||
else:
|
||||
raise ExtractorError('Unsupported player type {player_type!r}')
|
||||
|
||||
return {
|
||||
'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
|
||||
'live_status': live_status,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
|
||||
headers=self._MICROCMS_HEADER)
|
||||
|
||||
info = self._extract_info(metadata)
|
||||
|
||||
video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
|
||||
if video_type == 'VOD':
|
||||
return merge_dicts(info, self._process_vod(video_id, metadata))
|
||||
if video_type == 'LIVE':
|
||||
return merge_dicts(info, self._process_live(video_id, metadata))
|
||||
|
||||
raise ExtractorError(f'Unexpected video type {video_type!r}')
|
||||
|
||||
|
||||
class AsobiChannelTagURLIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel:tag'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
|
||||
'info_dict': {
|
||||
'id': 'bjhh-nbcja',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
|
||||
'info_dict': {
|
||||
'id': 'hvm5qw3c6od',
|
||||
'title': 'アイマスMOIW2023ラジオ',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
tag_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, tag_id)
|
||||
title = traverse_obj(self._search_nextjs_data(
|
||||
webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
|
||||
|
||||
media = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
|
||||
tag_id, headers=self._MICROCMS_HEADER)
|
||||
|
||||
def entries():
|
||||
for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
|
||||
'ie_key': AsobiChannelIE.ie_key(),
|
||||
**self._extract_info(metadata),
|
||||
}
|
||||
|
||||
return self.playlist_result(entries(), tag_id, title)
|
||||
@@ -1,53 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class ATTTechChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
|
||||
'info_dict': {
|
||||
'id': '11316',
|
||||
'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
|
||||
'ext': 'flv',
|
||||
'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
|
||||
'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"url\s*:\s*'(rtmp://[^']+)'",
|
||||
webpage, 'video URL')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'mediaid\s*=\s*(\d+)',
|
||||
webpage, 'video id', fatal=False)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
|
||||
webpage, 'upload date', fatal=False), False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
@@ -31,7 +31,7 @@ class BanByeBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
@@ -120,7 +120,7 @@ class BanByeIE(BanByeBaseIE):
|
||||
|
||||
|
||||
class BanByeChannelIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?channel/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?channel/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'info_dict': {
|
||||
@@ -152,7 +152,7 @@ class BanByeChannelIE(BanByeBaseIE):
|
||||
'sort': 'new',
|
||||
'limit': self._PAGE_SIZE,
|
||||
'offset': page_num * self._PAGE_SIZE,
|
||||
}, note=f'Downloading page {page_num+1}')
|
||||
}, note=f'Downloading page {page_num + 1}')
|
||||
return [
|
||||
self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE)
|
||||
for video in data['items']
|
||||
|
||||
@@ -317,16 +317,25 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
last_exception = None
|
||||
formats, subtitles = [], {}
|
||||
for media_set in self._MEDIA_SETS:
|
||||
try:
|
||||
return self._download_media_selector_url(
|
||||
fmts, subs = self._download_media_selector_url(
|
||||
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
|
||||
formats.extend(fmts)
|
||||
if subs:
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
||||
last_exception = e
|
||||
continue
|
||||
self._raise_extractor_error(e)
|
||||
self._raise_extractor_error(last_exception)
|
||||
if last_exception:
|
||||
if formats or subtitles:
|
||||
self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
|
||||
else:
|
||||
self._raise_extractor_error(last_exception)
|
||||
return formats, subtitles
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
media_selection = self._download_json(
|
||||
@@ -1188,7 +1197,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if initial_data is None:
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
||||
'preload state', default={})
|
||||
'preload state', default='{}')
|
||||
else:
|
||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
|
||||
@@ -3,14 +3,13 @@ from .youtube import YoutubeIE, YoutubeTabIE
|
||||
|
||||
|
||||
class BeatBumpVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beatbump\.ml/listen\?id=(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https://beatbump\.(?:ml|io)/listen\?id=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs',
|
||||
'md5': '5ff3fff41d3935b9810a9731e485fe66',
|
||||
'info_dict': {
|
||||
'id': 'MgNrAu2pzNs',
|
||||
'ext': 'mp4',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'artist': 'Stephen',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
|
||||
@@ -22,10 +21,9 @@ class BeatBumpVideoIE(InfoExtractor):
|
||||
'alt_title': 'Voyeur Girl',
|
||||
'view_count': int,
|
||||
'track': 'Voyeur Girl',
|
||||
'uploader': 'Stephen - Topic',
|
||||
'uploader': 'Stephen',
|
||||
'title': 'Voyeur Girl',
|
||||
'channel_follower_count': int,
|
||||
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
@@ -36,7 +34,12 @@ class BeatBumpVideoIE(InfoExtractor):
|
||||
'tags': 'count:11',
|
||||
'creator': 'Stephen',
|
||||
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||
}
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beatbump.io/listen?id=LDGZAprNGWo',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -45,7 +48,7 @@ class BeatBumpVideoIE(InfoExtractor):
|
||||
|
||||
|
||||
class BeatBumpPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https://beatbump\.(?:ml|io)/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE',
|
||||
'playlist_count': 50,
|
||||
@@ -56,25 +59,28 @@ class BeatBumpPlaylistIE(InfoExtractor):
|
||||
'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
|
||||
'description': '',
|
||||
'tags': [],
|
||||
'modified_date': '20221223',
|
||||
}
|
||||
'modified_date': '20231110',
|
||||
},
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'playlist_mincount': 1,
|
||||
'params': {'flatplaylist': True},
|
||||
'info_dict': {
|
||||
'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_id': '@NoCopyrightSounds',
|
||||
'channel_follower_count': int,
|
||||
'title': 'NoCopyrightSounds - Videos',
|
||||
'title': 'NoCopyrightSounds',
|
||||
'uploader': 'NoCopyrightSounds',
|
||||
'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a',
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'tags': 'count:12',
|
||||
'tags': 'count:65',
|
||||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'channel_is_verified': True,
|
||||
},
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'playlist_mincount': 1,
|
||||
@@ -84,16 +90,20 @@ class BeatBumpPlaylistIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
|
||||
'view_count': int,
|
||||
'channel_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_id': '@NoCopyrightSounds',
|
||||
'title': 'NCS : All Releases 💿',
|
||||
'uploader': 'NoCopyrightSounds',
|
||||
'availability': 'public',
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'tags': [],
|
||||
'modified_date': '20221225',
|
||||
'modified_date': '20231112',
|
||||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
'url': 'https://beatbump.io/playlist/VLPLFCHGavqRG-q_2ZhmgU2XB2--ZY6irT1c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -3,6 +3,7 @@ from ..utils import url_basename
|
||||
|
||||
|
||||
class BehindKinkIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
# TODO Remove - Reason: Outdated Site
|
||||
|
||||
|
||||
class BetIE(MTVServicesInfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BFIPlayerIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'bfi:player'
|
||||
_VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
|
||||
_TEST = {
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import extract_attributes
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _brightcove_url_result(self, video_id, video_block):
|
||||
@@ -55,8 +55,11 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
'ext': 'mp4',
|
||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20171018',
|
||||
'timestamp': 1508329950,
|
||||
'upload_date': '20220926',
|
||||
'timestamp': 1664207191,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://.+/image\.jpg',
|
||||
'tags': [],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
||||
@@ -29,7 +29,8 @@ class BigoIE(InfoExtractor):
|
||||
|
||||
info_raw = self._download_json(
|
||||
'https://ta.bigo.tv/official_website/studio/getInternalStudioInfo',
|
||||
user_id, data=urlencode_postdata({'siteId': user_id}))
|
||||
user_id, data=urlencode_postdata({'siteId': user_id}),
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
if not isinstance(info_raw, dict):
|
||||
raise ExtractorError('Received invalid JSON data')
|
||||
|
||||
@@ -2,6 +2,7 @@ import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
@@ -16,9 +17,12 @@ from ..utils import (
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
@@ -88,6 +92,12 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
return formats
|
||||
|
||||
def _download_playinfo(self, video_id, cid):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note=f'Downloading video formats for cid {cid}')['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
for idx, line in enumerate(json_data.get('body') or []):
|
||||
@@ -96,7 +106,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
f'{line["content"]}\n\n')
|
||||
return srt_data
|
||||
|
||||
def _get_subtitles(self, video_id, aid, cid):
|
||||
def _get_subtitles(self, video_id, cid, aid=None):
|
||||
subtitles = {
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
@@ -104,8 +114,15 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}]
|
||||
}
|
||||
|
||||
video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
|
||||
for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
|
||||
subtitle_info = traverse_obj(self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
|
||||
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
|
||||
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
|
||||
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in subs_list:
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
|
||||
@@ -155,7 +172,54 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
for entry in traverse_obj(season_info, (
|
||||
'result', 'main_section', 'episodes',
|
||||
lambda _, v: url_or_none(v['share_url']) and v['id'])):
|
||||
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
|
||||
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
|
||||
|
||||
def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
|
||||
cid_edges = cid_edges or {}
|
||||
division_data = self._download_json(
|
||||
'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
|
||||
query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
|
||||
note=f'Extracting divisions from edge {edge_id}')
|
||||
edges.setdefault(edge_id, {}).update(
|
||||
traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
|
||||
'title': ('title', {str}),
|
||||
'cid': ('cid', {int_or_none}),
|
||||
}), get_all=False))
|
||||
|
||||
edges[edge_id].update(traverse_obj(division_data, ('data', {
|
||||
'title': ('title', {str}),
|
||||
'choices': ('edges', 'questions', ..., 'choices', ..., {
|
||||
'edge_id': ('id', {int_or_none}),
|
||||
'cid': ('cid', {int_or_none}),
|
||||
'text': ('option', {str}),
|
||||
}),
|
||||
})))
|
||||
# use dict to combine edges that use the same video section (same cid)
|
||||
cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
|
||||
for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
|
||||
if choice['edge_id'] not in edges:
|
||||
edges[choice['edge_id']] = {'cid': choice['cid']}
|
||||
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
||||
return cid_edges
|
||||
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo):
|
||||
graph_version = traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
|
||||
'formats': self.extract_formats(play_info),
|
||||
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
@@ -180,7 +244,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# old av URL version
|
||||
'note': 'old av URL version',
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'info_dict': {
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
@@ -212,7 +276,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'id': 'BV1bK411W797_p1',
|
||||
'ext': 'mp4',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
|
||||
'tags': 'count:11',
|
||||
'tags': 'count:10',
|
||||
'timestamp': 1589601697,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'uploader': '打牌还是打桩',
|
||||
@@ -232,7 +296,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'id': 'BV1bK411W797_p1',
|
||||
'ext': 'mp4',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
|
||||
'tags': 'count:11',
|
||||
'tags': 'count:10',
|
||||
'timestamp': 1589601697,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'uploader': '打牌还是打桩',
|
||||
@@ -343,18 +407,120 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'interactive/split-path video',
|
||||
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
||||
'info_dict': {
|
||||
'id': 'BV1af4y1H7ga',
|
||||
'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
|
||||
'timestamp': 1630500414,
|
||||
'upload_date': '20210901',
|
||||
'description': 'md5:01113e39ab06e28042d74ac356a08786',
|
||||
'tags': list,
|
||||
'uploader': '钉宫妮妮Ninico',
|
||||
'duration': 1503,
|
||||
'uploader_id': '8881297',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'playlist_count': 33,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1af4y1H7ga_400950101',
|
||||
'ext': 'mp4',
|
||||
'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
|
||||
'timestamp': 1630500414,
|
||||
'upload_date': '20210901',
|
||||
'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
|
||||
'tags': list,
|
||||
'uploader': '钉宫妮妮Ninico',
|
||||
'duration': 11.605,
|
||||
'uploader_id': '8881297',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'note': '301 redirect to bangumi link',
|
||||
'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
|
||||
'info_dict': {
|
||||
'id': '288525',
|
||||
'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
|
||||
'ext': 'mp4',
|
||||
'series': '我和我的祖国',
|
||||
'series_id': '4780',
|
||||
'season': '幕后纪实',
|
||||
'season_id': '28609',
|
||||
'season_number': 1,
|
||||
'episode': '钱学森弹道和乘波体飞行器是什么?',
|
||||
'episode_id': '288525',
|
||||
'episode_number': 105,
|
||||
'duration': 1183.957,
|
||||
'timestamp': 1571648124,
|
||||
'upload_date': '20191021',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
||||
'info_dict': {
|
||||
'id': 'BV1jL41167ZG',
|
||||
'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'supporter-only video',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
|
||||
'info_dict': {
|
||||
'id': 'BV1Ks411f7aQ',
|
||||
'title': '【BD1080P】狼与香辛料I【华盟】',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
|
||||
'info_dict': {
|
||||
'id': 'BV1GJ411x7h7',
|
||||
'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'geo-restricted',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
if not self._match_valid_url(urlh.url):
|
||||
return self.url_result(urlh.url)
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
play_info_obj = self._search_json(
|
||||
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
|
||||
if not play_info_obj:
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
play_info = traverse_obj(play_info_obj, ('data', {dict}))
|
||||
if not play_info:
|
||||
if traverse_obj(play_info_obj, 'code') == 87007:
|
||||
toast = get_element_by_class('tips-toast', webpage) or ''
|
||||
msg = clean_html(
|
||||
f'{get_element_by_class("belongs-to", toast) or ""},'
|
||||
+ (get_element_by_class('level', toast) or ''))
|
||||
raise ExtractorError(
|
||||
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
|
||||
raise ExtractorError('Failed to extract play info')
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
@@ -385,10 +551,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note='Extracting festival video formats')['data']
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
@@ -397,7 +560,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
|
||||
}, get_all=False)
|
||||
|
||||
return {
|
||||
metainfo = {
|
||||
**traverse_obj(initial_state, {
|
||||
'uploader': ('upData', 'name'),
|
||||
'uploader_id': ('upData', 'mid', {str_or_none}),
|
||||
@@ -413,28 +576,59 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': ('stat', 'reply', {int_or_none}),
|
||||
}, get_all=False),
|
||||
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
|
||||
'formats': self.extract_formats(play_info),
|
||||
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
|
||||
'title': title,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, aid, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
||||
if is_interactive:
|
||||
return self.playlist_result(
|
||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
|
||||
'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
})
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'formats': self.extract_formats(play_info),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
|
||||
'info_dict': {
|
||||
'id': '21495',
|
||||
'ext': 'mp4',
|
||||
'series': '悠久之翼',
|
||||
'series_id': '774',
|
||||
'season': '第二季',
|
||||
'season_id': '1182',
|
||||
'season_number': 2,
|
||||
'episode': 'forever/ef',
|
||||
'episode_id': '21495',
|
||||
'episode_number': 12,
|
||||
'title': '12 forever/ef',
|
||||
'duration': 1420.791,
|
||||
'timestamp': 1320412200,
|
||||
'upload_date': '20111104',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||
'info_dict': {
|
||||
'id': '267851',
|
||||
'ext': 'mp4',
|
||||
'series': '鬼灭之刃',
|
||||
'series_id': '4358',
|
||||
'season': '鬼灭之刃',
|
||||
'season': '立志篇',
|
||||
'season_id': '26801',
|
||||
'season_number': 1,
|
||||
'episode': '残酷',
|
||||
@@ -446,13 +640,32 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'upload_date': '20190406',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
|
||||
'skip': 'Geo-restricted',
|
||||
}, {
|
||||
'note': 'a making-of which falls outside main section',
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep345120',
|
||||
'info_dict': {
|
||||
'id': '345120',
|
||||
'ext': 'mp4',
|
||||
'series': '鬼灭之刃',
|
||||
'series_id': '4358',
|
||||
'season': '立志篇',
|
||||
'season_id': '26801',
|
||||
'season_number': 1,
|
||||
'episode': '炭治郎篇',
|
||||
'episode_id': '345120',
|
||||
'episode_number': 27,
|
||||
'title': '#1 炭治郎篇',
|
||||
'duration': 1922.129,
|
||||
'timestamp': 1602853860,
|
||||
'upload_date': '20201016',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
episode_id = video_id[2:]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
@@ -461,7 +674,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
premium_only = play_info.get('code') == -10403
|
||||
@@ -472,40 +685,43 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
bangumi_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
|
||||
'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
|
||||
query={'ep_id': episode_id}, headers=headers)['result']
|
||||
|
||||
episode_number, episode_info = next((
|
||||
(idx, ep) for idx, ep in enumerate(traverse_obj(
|
||||
bangumi_info, ('episodes', ..., {dict})), 1)
|
||||
bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
|
||||
if str_or_none(ep.get('id')) == episode_id), (1, {}))
|
||||
|
||||
season_id = bangumi_info.get('season_id')
|
||||
season_number = season_id and next((
|
||||
idx + 1 for idx, e in enumerate(
|
||||
season_number, season_title = season_id and next((
|
||||
(idx + 1, e.get('season_title')) for idx, e in enumerate(
|
||||
traverse_obj(bangumi_info, ('seasons', ...)))
|
||||
if e.get('season_id') == season_id
|
||||
), None)
|
||||
), (None, None))
|
||||
|
||||
aid = episode_info.get('aid')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': episode_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(bangumi_info, {
|
||||
'series': ('series', 'series_title', {str}),
|
||||
'series_id': ('series', 'series_id', {str_or_none}),
|
||||
'thumbnail': ('square_cover', {url_or_none}),
|
||||
}),
|
||||
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
|
||||
'episode': episode_info.get('long_title'),
|
||||
**traverse_obj(episode_info, {
|
||||
'episode': ('long_title', {str}),
|
||||
'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
|
||||
'timestamp': ('pub_time', {int_or_none}),
|
||||
'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
|
||||
}),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
|
||||
'season': str_or_none(season_title),
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'timestamp': int_or_none(episode_info.get('pub_time')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
|
||||
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
}
|
||||
@@ -517,17 +733,53 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
'info_dict': {
|
||||
'id': '24097891',
|
||||
'title': 'CAROLE & TUESDAY',
|
||||
'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
|
||||
},
|
||||
'playlist_mincount': 25,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md1565/',
|
||||
'info_dict': {
|
||||
'id': '1565',
|
||||
'title': '攻壳机动队 S.A.C. 2nd GIG',
|
||||
'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
|
||||
},
|
||||
'playlist_count': 26,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '68540',
|
||||
'ext': 'mp4',
|
||||
'series': '攻壳机动队',
|
||||
'series_id': '1077',
|
||||
'season': '第二季',
|
||||
'season_id': '1565',
|
||||
'season_number': 2,
|
||||
'episode': '再启动 REEMBODY',
|
||||
'episode_id': '68540',
|
||||
'episode_number': 1,
|
||||
'title': '1 再启动 REEMBODY',
|
||||
'duration': 1525.777,
|
||||
'timestamp': 1425074413,
|
||||
'upload_date': '20150227',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
ss_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
|
||||
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
|
||||
initial_state = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
ss_id = initial_state['mediaInfo']['season_id']
|
||||
|
||||
return self.playlist_result(
|
||||
self._get_episodes_from_season(ss_id, url), media_id,
|
||||
**traverse_obj(initial_state, ('mediaInfo', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('evaluate', {str}),
|
||||
})))
|
||||
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
@@ -535,15 +787,183 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
'id': '26801'
|
||||
'id': '26801',
|
||||
'title': '鬼灭之刃',
|
||||
'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss2251',
|
||||
'info_dict': {
|
||||
'id': '2251',
|
||||
'title': '玲音',
|
||||
'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '50188',
|
||||
'ext': 'mp4',
|
||||
'series': '玲音',
|
||||
'series_id': '1526',
|
||||
'season': 'TV',
|
||||
'season_id': '2251',
|
||||
'season_number': 1,
|
||||
'episode': 'WEIRD',
|
||||
'episode_id': '50188',
|
||||
'episode_number': 1,
|
||||
'title': '1 WEIRD',
|
||||
'duration': 1436.992,
|
||||
'timestamp': 1343185080,
|
||||
'upload_date': '20120725',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ss_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, ss_id)
|
||||
metainfo = traverse_obj(
|
||||
self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
|
||||
('itemListElement', ..., {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
}), get_all=False)
|
||||
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
|
||||
|
||||
|
||||
class BilibiliCheeseBaseIE(BilibiliBaseIE):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _extract_episode(self, season_info, ep_id):
|
||||
episode_info = traverse_obj(season_info, (
|
||||
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
|
||||
aid, cid = episode_info['aid'], episode_info['cid']
|
||||
|
||||
if traverse_obj(episode_info, 'ep_status') == -1:
|
||||
raise ExtractorError('This course episode is not yet available.', expected=True)
|
||||
if not traverse_obj(episode_info, 'playable'):
|
||||
self.raise_login_required('You need to purchase the course to download this episode')
|
||||
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
|
||||
query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
|
||||
headers=self._HEADERS, note='Downloading playinfo')['data']
|
||||
|
||||
return {
|
||||
'id': str_or_none(ep_id),
|
||||
'episode_id': str_or_none(ep_id),
|
||||
'formats': self.extract_formats(play_info),
|
||||
'extractor_key': BilibiliCheeseIE.ie_key(),
|
||||
'extractor': BilibiliCheeseIE.IE_NAME,
|
||||
'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
|
||||
**traverse_obj(episode_info, {
|
||||
'episode': ('title', {str}),
|
||||
'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
|
||||
'alt_title': ('subtitle', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'episode_number': ('index', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'timestamp': ('release_date', {int_or_none}),
|
||||
'view_count': ('play', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(season_info, {
|
||||
'uploader': ('up_info', 'uname', {str}),
|
||||
'uploader_id': ('up_info', 'mid', {str_or_none}),
|
||||
}),
|
||||
'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
def _download_season_info(self, query_key, video_id):
|
||||
return self._download_json(
|
||||
f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
|
||||
headers=self._HEADERS, note='Downloading season info')['data']
|
||||
|
||||
|
||||
class BilibiliCheeseIE(BilibiliCheeseBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/cheese/play/ep229832',
|
||||
'info_dict': {
|
||||
'id': '229832',
|
||||
'ext': 'mp4',
|
||||
'title': '1 - 课程先导片',
|
||||
'alt_title': '视频课 · 3分41秒',
|
||||
'uploader': '马督工',
|
||||
'uploader_id': '316568752',
|
||||
'episode': '课程先导片',
|
||||
'episode_id': '229832',
|
||||
'episode_number': 1,
|
||||
'duration': 221,
|
||||
'timestamp': 1695549606,
|
||||
'upload_date': '20230924',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ep_id = self._match_id(url)
|
||||
return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
|
||||
|
||||
|
||||
class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/cheese/play/ss5918',
|
||||
'info_dict': {
|
||||
'id': '5918',
|
||||
'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
|
||||
'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '229832',
|
||||
'ext': 'mp4',
|
||||
'title': '1 - 课程先导片',
|
||||
'alt_title': '视频课 · 3分41秒',
|
||||
'uploader': '马督工',
|
||||
'uploader_id': '316568752',
|
||||
'episode': '课程先导片',
|
||||
'episode_id': '229832',
|
||||
'episode_number': 1,
|
||||
'duration': 221,
|
||||
'timestamp': 1695549606,
|
||||
'upload_date': '20230924',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'view_count': int,
|
||||
}
|
||||
}],
|
||||
'params': {'playlist_items': '1'},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/cheese/play/ss5918',
|
||||
'info_dict': {
|
||||
'id': '5918',
|
||||
'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
|
||||
'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
'skip': 'paid video in list',
|
||||
}]
|
||||
|
||||
def _get_cheese_entries(self, season_info):
|
||||
for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
|
||||
yield self._extract_episode(season_info, ep_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
season_id = self._match_id(url)
|
||||
season_info = self._download_season_info('season_id', season_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._get_cheese_entries(season_info), season_id,
|
||||
**traverse_obj(season_info, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('subtitle', {str}),
|
||||
}))
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
@@ -1202,6 +1622,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
@@ -1239,19 +1660,34 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
'aid': aid,
|
||||
})) or {}
|
||||
subtitles = {}
|
||||
for sub in sub_json.get('subtitles') or []:
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
sub_data = self._download_json(
|
||||
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
|
||||
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
|
||||
if not sub_data:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(sub_data)
|
||||
})
|
||||
fetched_urls = set()
|
||||
for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
|
||||
for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
|
||||
if url in fetched_urls:
|
||||
continue
|
||||
fetched_urls.add(url)
|
||||
sub_ext = determine_ext(url)
|
||||
sub_lang = sub.get('lang_key') or 'en'
|
||||
|
||||
if sub_ext == 'ass':
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': 'ass',
|
||||
'url': url,
|
||||
})
|
||||
elif sub_ext == 'json':
|
||||
sub_data = self._download_json(
|
||||
url, ep_id or aid, fatal=False,
|
||||
note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
|
||||
errnote='Unable to download subtitles')
|
||||
|
||||
if sub_data:
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(sub_data),
|
||||
})
|
||||
else:
|
||||
self.report_warning('Unexpected subtitle extension', ep_id or aid)
|
||||
|
||||
return subtitles
|
||||
|
||||
def _get_formats(self, *, ep_id=None, aid=None):
|
||||
@@ -1297,7 +1733,9 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
def _parse_video_metadata(self, video_data):
|
||||
return {
|
||||
'title': video_data.get('title_display') or video_data.get('title'),
|
||||
'description': video_data.get('desc'),
|
||||
'thumbnail': video_data.get('cover'),
|
||||
'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
|
||||
}
|
||||
@@ -1394,17 +1832,6 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'episode_number': 140,
|
||||
},
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
||||
}, {
|
||||
'url': 'https://www.bilibili.tv/en/video/2041863208',
|
||||
'info_dict': {
|
||||
'id': '2041863208',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1670874843,
|
||||
'description': 'Scheduled for April 2023.\nStudio: ufotable',
|
||||
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||
'upload_date': '20221212',
|
||||
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||
},
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
@@ -1445,9 +1872,9 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
|
||||
'timestamp': 1667891924,
|
||||
'upload_date': '20221108',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
|
||||
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
@@ -1510,10 +1937,12 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
return merge_dicts(
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
|
||||
'title': self._html_search_meta('og:title', webpage),
|
||||
'description': self._html_search_meta('og:description', webpage)
|
||||
})
|
||||
self._parse_video_metadata(video_data), {
|
||||
'title': get_element_by_class(
|
||||
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
|
||||
'description': get_element_by_class(
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
|
||||
}, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
comment_api_raw_data = self._download_json(
|
||||
@@ -1601,7 +2030,8 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
||||
'chapters': chapters,
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id)
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id),
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from .vk import VKIE
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class BIQLEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://biqle.ru/watch/-2000421746_85421746',
|
||||
'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
|
||||
'info_dict': {
|
||||
'id': '-2000421746_85421746',
|
||||
'ext': 'mp4',
|
||||
'title': 'Forsaken By Hope Studio Clip',
|
||||
'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
|
||||
'upload_date': '19700101',
|
||||
'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
|
||||
'timestamp': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://biqle.org/watch/-44781847_168547604',
|
||||
'md5': '7f24e72af1db0edf7c1aaba513174f97',
|
||||
'info_dict': {
|
||||
'id': '-44781847_168547604',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ребенок в шоке от автоматической мойки',
|
||||
'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
|
||||
'timestamp': 1396633454,
|
||||
'upload_date': '20140404',
|
||||
'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'Title', fatal=False)
|
||||
timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
|
||||
description = self._html_search_meta('description', webpage, 'Description', default=None)
|
||||
|
||||
global_embed_url = self._search_regex(
|
||||
r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
|
||||
webpage, 'global Embed url')
|
||||
hash = self._search_regex(
|
||||
r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
|
||||
|
||||
embed_url = global_embed_url + hash
|
||||
|
||||
if VKIE.suitable(embed_url):
|
||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
|
||||
|
||||
glob_params = self._parse_json(self._search_regex(
|
||||
r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
|
||||
embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
|
||||
host_name = compat_b64decode(glob_params['server'][::-1]).decode()
|
||||
|
||||
item = self._download_json(
|
||||
f'https://{host_name}/method/video.get/{video_id}', video_id,
|
||||
headers={'Referer': url}, query={
|
||||
'token': glob_params['video']['access_token'],
|
||||
'videos': video_id,
|
||||
'ckey': glob_params['c_key'],
|
||||
'credentials': glob_params['video']['credentials'],
|
||||
})['response']['items'][0]
|
||||
|
||||
formats = []
|
||||
for f_id, f_url in item.get('files', {}).items():
|
||||
if f_id == 'external':
|
||||
return self.url_result(f_url)
|
||||
ext, height = f_id.split('_')
|
||||
height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
|
||||
if height_extra_key:
|
||||
formats.append({
|
||||
'format_id': f'{height}p',
|
||||
'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for k, v in item.items():
|
||||
if k.startswith('photo_') and v:
|
||||
width = k.replace('photo_', '')
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': v,
|
||||
'width': int_or_none(width),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'comment_count': int_or_none(item.get('comments')),
|
||||
'description': description,
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'view_count': int_or_none(item.get('views')),
|
||||
}
|
||||
@@ -7,8 +7,10 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_element_html_by_class,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
@@ -17,6 +19,7 @@ from ..utils import (
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,6 +37,25 @@ class BitChuteIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
|
||||
},
|
||||
}, {
|
||||
# test case: video with different channel and uploader
|
||||
'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/',
|
||||
'md5': 'f10e6a8e787766235946d0868703f1d0',
|
||||
'info_dict': {
|
||||
'id': 'Yti_j9A-UZ4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Israel at War | Full Measure',
|
||||
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'sharylattkisson',
|
||||
'upload_date': '20231106',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
||||
'channel': 'Full Measure with Sharyl Attkisson',
|
||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
|
||||
},
|
||||
}, {
|
||||
# video not downloadable in browser, but we can recover it
|
||||
@@ -48,6 +70,9 @@ class BitChuteIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20181113',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
|
||||
},
|
||||
'params': {'check_formats': None},
|
||||
}, {
|
||||
@@ -99,6 +124,11 @@ class BitChuteIE(InfoExtractor):
|
||||
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
||||
self.raise_geo_restricted(reason)
|
||||
|
||||
@staticmethod
|
||||
def _make_url(html):
|
||||
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
|
||||
return urljoin('https://www.bitchute.com', path)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
@@ -121,12 +151,19 @@ class BitChuteIE(InfoExtractor):
|
||||
'Video is unavailable. Please make sure this video is playable in the browser '
|
||||
'before reporting this issue.', expected=True, video_id=video_id)
|
||||
|
||||
details = get_element_by_class('details', webpage) or ''
|
||||
uploader_html = get_element_html_by_class('creator', details) or ''
|
||||
channel_html = get_element_html_by_class('name', details) or ''
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': clean_html(get_element_by_class('owner', webpage)),
|
||||
'uploader': clean_html(uploader_html),
|
||||
'uploader_url': self._make_url(uploader_html),
|
||||
'channel': clean_html(channel_html),
|
||||
'channel_url': self._make_url(channel_html),
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
||||
'formats': formats,
|
||||
@@ -154,6 +191,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
@@ -169,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'wV9Imujxasw9',
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BitwaveReplayIE(InfoExtractor):
|
||||
IE_NAME = 'bitwave:replay'
|
||||
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$'
|
||||
_TEST = {
|
||||
'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr',
|
||||
'only_matching': True
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
replay_id = self._match_id(url)
|
||||
replay = self._download_json(
|
||||
'https://api.bitwave.tv/v1/replays/' + replay_id,
|
||||
replay_id
|
||||
)
|
||||
|
||||
return {
|
||||
'id': replay_id,
|
||||
'title': replay['data']['title'],
|
||||
'uploader': replay['data']['name'],
|
||||
'uploader_id': replay['data']['name'],
|
||||
'url': replay['data']['url'],
|
||||
'thumbnails': [
|
||||
{'url': x} for x in replay['data']['thumbnails']
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class BitwaveStreamIE(InfoExtractor):
|
||||
IE_NAME = 'bitwave:stream'
|
||||
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$'
|
||||
_TEST = {
|
||||
'url': 'https://bitwave.tv/doomtube',
|
||||
'only_matching': True
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
channel = self._download_json(
|
||||
'https://api.bitwave.tv/v1/channels/' + username,
|
||||
username)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
channel['data']['url'], username,
|
||||
'mp4')
|
||||
|
||||
return {
|
||||
'id': username,
|
||||
'title': channel['data']['title'],
|
||||
'uploader': username,
|
||||
'uploader_id': username,
|
||||
'formats': formats,
|
||||
'thumbnail': channel['data']['thumbnail'],
|
||||
'is_live': True,
|
||||
'view_count': channel['data']['viewCount']
|
||||
}
|
||||
@@ -22,7 +22,7 @@ class BleacherReportIE(InfoExtractor):
|
||||
'upload_date': '20150615',
|
||||
'uploader': 'Team Stream Now ',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'skip': 'Video removed',
|
||||
}, {
|
||||
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
|
||||
'md5': '6a5cd403418c7b01719248ca97fb0692',
|
||||
@@ -70,8 +70,6 @@ class BleacherReportIE(InfoExtractor):
|
||||
video_type = video['type']
|
||||
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||
elif video_type == 'ooyala.com':
|
||||
info['url'] = 'ooyala:%s' % video['id']
|
||||
elif video_type == 'youtube.com':
|
||||
info['url'] = video['id']
|
||||
elif video_type == 'vine.co':
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
# try_get,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)/file/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
@@ -18,11 +19,12 @@ class BoxIE(InfoExtractor):
|
||||
'id': '510727257538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||
'uploader': 'MLS Video',
|
||||
'uploader': '',
|
||||
'timestamp': 1566320259,
|
||||
'upload_date': '20190820',
|
||||
'uploader_id': '235196876',
|
||||
}
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -58,26 +60,15 @@ class BoxIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||
# entry_url_template = try_get(
|
||||
# entry, lambda x: x['content']['url_template'])
|
||||
# if not entry_url_template:
|
||||
# continue
|
||||
# representation = entry.get('representation')
|
||||
# if representation == 'dash':
|
||||
# TODO: append query to every fragment URL
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||
# file_id, query=query))
|
||||
|
||||
authenticated_download_url = f.get('authenticated_download_url')
|
||||
if authenticated_download_url and f.get('is_download_available'):
|
||||
formats.append({
|
||||
'ext': f.get('extension') or determine_ext(title),
|
||||
'filesize': f.get('size'),
|
||||
'format_id': 'download',
|
||||
'url': update_url_query(authenticated_download_url, query),
|
||||
})
|
||||
for url_tmpl in traverse_obj(f, (
|
||||
'representations', 'entries', lambda _, v: v['representation'] == 'dash',
|
||||
'content', 'url_template', {url_or_none}
|
||||
)):
|
||||
manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query)
|
||||
fmts = self._extract_mpd_formats(manifest_url, file_id)
|
||||
for fmt in fmts:
|
||||
fmt['extra_param_to_segment_url'] = urllib.parse.urlparse(manifest_url).query
|
||||
formats.extend(fmts)
|
||||
|
||||
creator = f.get('created_by') or {}
|
||||
|
||||
|
||||
@@ -1,18 +1,15 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class BRIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_DESC = 'Bayerischer Rundfunk'
|
||||
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
|
||||
|
||||
@@ -167,142 +164,3 @@ class BRIE(InfoExtractor):
|
||||
} for variant in variants.findall('variant') if xpath_text(variant, 'url')]
|
||||
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
|
||||
return thumbnails
|
||||
|
||||
|
||||
class BRMediathekIE(InfoExtractor):
|
||||
IE_DESC = 'Bayerischer Rundfunk Mediathek'
|
||||
_VALID_URL = r'https?://(?:www\.)?br\.de/mediathek//?video/(?:[^/?&#]+?-)?(?P<id>av:[0-9a-f]{24})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
|
||||
'md5': 'fdc3d485835966d1622587d08ba632ec',
|
||||
'info_dict': {
|
||||
'id': 'av:5a1e6a6e8fce6d001871cc8e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die Sendung vom 28.11.2017',
|
||||
'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
|
||||
'timestamp': 1511942766,
|
||||
'upload_date': '20171129',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.br.de/mediathek//video/av:61b0db581aed360007558c12',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
clip_id = self._match_id(url)
|
||||
|
||||
clip = self._download_json(
|
||||
'https://proxy-base.master.mango.express/graphql',
|
||||
clip_id, data=json.dumps({
|
||||
"query": """{
|
||||
viewer {
|
||||
clip(id: "%s") {
|
||||
title
|
||||
description
|
||||
duration
|
||||
createdAt
|
||||
ageRestriction
|
||||
videoFiles {
|
||||
edges {
|
||||
node {
|
||||
publicLocation
|
||||
fileSize
|
||||
videoProfile {
|
||||
width
|
||||
height
|
||||
bitrate
|
||||
encoding
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
captionFiles {
|
||||
edges {
|
||||
node {
|
||||
publicLocation
|
||||
}
|
||||
}
|
||||
}
|
||||
teaserImages {
|
||||
edges {
|
||||
node {
|
||||
imageFiles {
|
||||
edges {
|
||||
node {
|
||||
publicLocation
|
||||
width
|
||||
height
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}""" % clip_id}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})['data']['viewer']['clip']
|
||||
title = clip['title']
|
||||
|
||||
formats = []
|
||||
for edge in clip.get('videoFiles', {}).get('edges', []):
|
||||
node = edge.get('node', {})
|
||||
n_url = node.get('publicLocation')
|
||||
if not n_url:
|
||||
continue
|
||||
ext = determine_ext(n_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
n_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
video_profile = node.get('videoProfile', {})
|
||||
tbr = int_or_none(video_profile.get('bitrate'))
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += '-%d' % tbr
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': n_url,
|
||||
'width': int_or_none(video_profile.get('width')),
|
||||
'height': int_or_none(video_profile.get('height')),
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(node.get('fileSize')),
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for edge in clip.get('captionFiles', {}).get('edges', []):
|
||||
node = edge.get('node', {})
|
||||
n_url = node.get('publicLocation')
|
||||
if not n_url:
|
||||
continue
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': n_url,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for edge in clip.get('teaserImages', {}).get('edges', []):
|
||||
for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
|
||||
node = image_edge.get('node', {})
|
||||
n_url = node.get('publicLocation')
|
||||
if not n_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': n_url,
|
||||
'width': int_or_none(node.get('width')),
|
||||
'height': int_or_none(node.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
'title': title,
|
||||
'description': clip.get('description'),
|
||||
'duration': int_or_none(clip.get('duration')),
|
||||
'timestamp': parse_iso8601(clip.get('createdAt')),
|
||||
'age_limit': int_or_none(clip.get('ageRestriction')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||
'info_dict': {
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
'title': 'When Girls Act Like D-Bags',
|
||||
'age_limit': 13,
|
||||
},
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work',
|
||||
'info_dict': {
|
||||
'id': 'RrrDLdeL2HQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Whale Watching Boat Crashing Into San Diego Dock',
|
||||
'description': 'md5:afc1b2772f0a8468be51dd80eb021069',
|
||||
'upload_date': '20160331',
|
||||
'uploader': 'Steve Holden',
|
||||
'uploader_id': 'sdholden07',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||
|
||||
content = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage,
|
||||
'content'),
|
||||
display_id)
|
||||
|
||||
formats = []
|
||||
for video in content:
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'http-%d' % bitrate if bitrate else 'http',
|
||||
'tbr': bitrate,
|
||||
})
|
||||
|
||||
title = self._search_regex(
|
||||
(r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value')
|
||||
|
||||
def get(key, name):
|
||||
return int_or_none(self._search_regex(
|
||||
r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name,
|
||||
default=None))
|
||||
|
||||
age_limit = get('ratings', 'age limit')
|
||||
video_id = video_id or get('pid', 'video id') or display_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -2,7 +2,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class BreitBartIE(InfoExtractor):
|
||||
_VALID_URL = r'https?:\/\/(?:www\.)breitbart.com/videos/v/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?breitbart\.com/videos/v/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.breitbart.com/videos/v/5cOz1yup/?pl=Ij6NDOji',
|
||||
'md5': '0aa6d1d6e183ac5ca09207fe49f17ade',
|
||||
|
||||
@@ -21,10 +21,10 @@ class BrilliantpalaBaseIE(InfoExtractor):
|
||||
|
||||
def _get_logged_in_username(self, url, video_id):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
if self._LOGIN_API == urlh.url:
|
||||
if urlh.url.startswith(self._LOGIN_API):
|
||||
self.raise_login_required()
|
||||
return self._html_search_regex(
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
|
||||
123
yt_dlp/extractor/bundestag.py
Normal file
123
yt_dlp/extractor/bundestag.py
Normal file
@@ -0,0 +1,123 @@
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
format_field,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BundestagIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)',
|
||||
r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://dbtg.tv/cvid/7605304',
|
||||
'info_dict': {
|
||||
'id': '7605304',
|
||||
'ext': 'mp4',
|
||||
'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit',
|
||||
'description': 'md5:321a9dc6bdad201264c0045efc371561',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek',
|
||||
'info_dict': {
|
||||
'id': '7602120',
|
||||
'ext': 'mp4',
|
||||
'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung',
|
||||
'description': 'Befragung der Bundesregierung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtg.tv/fvid/3594346',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay'
|
||||
_INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8'
|
||||
|
||||
_SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId='
|
||||
_SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)'
|
||||
_SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)'
|
||||
|
||||
def _bt_extract_share_formats(self, video_id):
|
||||
share_data = self._download_json(
|
||||
f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON')
|
||||
if traverse_obj(share_data, ('status', 'code', {int})) != 1:
|
||||
self.report_warning(format_field(
|
||||
share_data, [('status', 'message', {str})],
|
||||
'Share API response: %s', default='Unknown Share API Error')
|
||||
+ bug_reports_message())
|
||||
return
|
||||
|
||||
for name, url in share_data.items():
|
||||
if not isinstance(name, str) or not url_or_none(url):
|
||||
continue
|
||||
|
||||
elif name.startswith('audio'):
|
||||
match = re.search(self._SHARE_AUDIO_REGEX, url)
|
||||
yield {
|
||||
'format_id': name,
|
||||
'url': url,
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(match, {
|
||||
'acodec': 'codec',
|
||||
'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}),
|
||||
'abr': ('bitrate', {int_or_none}),
|
||||
'ext': 'ext',
|
||||
}),
|
||||
}
|
||||
|
||||
elif name.startswith('download'):
|
||||
match = re.search(self._SHARE_VIDEO_REGEX, url)
|
||||
yield {
|
||||
'format_id': name,
|
||||
'url': url,
|
||||
**traverse_obj(match, {
|
||||
'vcodec': 'codec',
|
||||
'tbr': ('bitrate', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'ext': 'ext',
|
||||
}),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = []
|
||||
result = {'id': video_id, 'formats': formats}
|
||||
|
||||
try:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance'))
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
|
||||
raise ExtractorError('Could not find video id', expected=True)
|
||||
self.report_warning(f'Error extracting hls formats: {error}', video_id)
|
||||
formats.extend(self._bt_extract_share_formats(video_id))
|
||||
if not formats:
|
||||
self.raise_no_formats('Could not find suitable formats', video_id=video_id)
|
||||
|
||||
result.update(traverse_obj(self._download_webpage(
|
||||
self._OVERLAY_URL, video_id,
|
||||
query={'videoid': video_id, 'view': 'main'},
|
||||
note='Downloading metadata overlay', fatal=False,
|
||||
), {
|
||||
'title': (
|
||||
{partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||
{partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||
}))
|
||||
|
||||
return result
|
||||
@@ -8,9 +8,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
|
||||
_TESTS = [{
|
||||
# ooyalaVOD
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'info_dict': {
|
||||
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
|
||||
@@ -24,7 +24,6 @@ class BYUtvIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# dvr
|
||||
'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
|
||||
@@ -63,19 +62,6 @@ class BYUtvIE(InfoExtractor):
|
||||
'x-byutv-platformkey': 'xsaaw9c7y5',
|
||||
})
|
||||
|
||||
ep = video.get('ooyalaVOD')
|
||||
if ep:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': ep.get('title'),
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
|
||||
info = {}
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CamWithHerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=',
|
||||
'info_dict': {
|
||||
'id': '5644',
|
||||
'ext': 'flv',
|
||||
'title': 'Periscope Tease',
|
||||
'description': 'In the clouds teasing on periscope to my favorite song',
|
||||
'duration': 240,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'MileenaK',
|
||||
'upload_date': '20160322',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
flv_id = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id')
|
||||
|
||||
# Video URL construction algorithm is reverse-engineered from cwhplayer.swf
|
||||
rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % (
|
||||
('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'>Description:</span>(.+?)</div>', webpage, 'description', default=None)
|
||||
|
||||
runtime = self._search_regex(
|
||||
r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None)
|
||||
if runtime:
|
||||
runtime = re.sub(r'[\s-]', '', runtime)
|
||||
duration = parse_duration(runtime)
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'Views\s*:\s*(\d+)', webpage, 'view count', default=None))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None))
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
'id': flv_id,
|
||||
'url': rtmp_url,
|
||||
'ext': 'flv',
|
||||
'no_resume': True,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18
|
||||
}
|
||||
@@ -1,105 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
format_field,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
from .videomore import VideomoreIE
|
||||
|
||||
|
||||
class CarambaTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://video1.carambatv.ru/v/191910501',
|
||||
'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a',
|
||||
'info_dict': {
|
||||
'id': '191910501',
|
||||
'ext': 'mp4',
|
||||
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2678.31,
|
||||
},
|
||||
}, {
|
||||
'url': 'carambatv:191910501',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id,
|
||||
video_id)
|
||||
|
||||
title = video['title']
|
||||
|
||||
base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id
|
||||
|
||||
formats = [{
|
||||
'url': base_url + f['fn'],
|
||||
'height': int_or_none(f.get('height')),
|
||||
'format_id': format_field(f, 'height', '%sp'),
|
||||
} for f in video['qualities'] if f.get('fn')]
|
||||
|
||||
thumbnail = video.get('splash')
|
||||
duration = float_or_none(try_get(
|
||||
video, lambda x: x['annotations'][0]['end_time'], compat_str))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class CarambaTVPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
|
||||
'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
|
||||
'info_dict': {
|
||||
'id': '475222',
|
||||
'ext': 'flv',
|
||||
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
# duration reported by videomore is incorrect
|
||||
'duration': int,
|
||||
},
|
||||
'add_ie': [VideomoreIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
videomore_url = VideomoreIE._extract_url(webpage)
|
||||
if not videomore_url:
|
||||
videomore_id = self._search_regex(
|
||||
r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
|
||||
default=None)
|
||||
if videomore_id:
|
||||
videomore_url = 'videomore:%s' % videomore_id
|
||||
if videomore_url:
|
||||
title = self._og_search_title(webpage)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': videomore_url,
|
||||
'ie_key': VideomoreIE.ie_key(),
|
||||
'title': title,
|
||||
}
|
||||
|
||||
video_url = self._og_search_property('video:iframe', webpage, default=None)
|
||||
|
||||
if not video_url:
|
||||
video_id = self._search_regex(
|
||||
r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)',
|
||||
webpage, 'video id')
|
||||
video_url = 'carambatv:%s' % video_id
|
||||
|
||||
return self.url_result(video_url, CarambaTVIE.ie_key())
|
||||
@@ -1,8 +1,9 @@
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -179,6 +180,13 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||
'tags': 'count:8',
|
||||
'location': 'Quebec',
|
||||
'series': 'All in a Weekend Montreal',
|
||||
'season': 'Season 2015',
|
||||
'season_number': 2015,
|
||||
'media_type': 'Excerpt',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
@@ -194,25 +202,37 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
'series': 'CBC News: Windsor at 6:00',
|
||||
'categories': ['News/Canada/Windsor'],
|
||||
'location': 'Windsor',
|
||||
'tags': ['cancer'],
|
||||
'creator': 'Allison Johnson',
|
||||
'media_type': 'Excerpt',
|
||||
},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'http://www.cbc.ca/player/play/2249992771553',
|
||||
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
|
||||
'url': 'http://www.cbc.ca/player/play/2284799043667',
|
||||
'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
|
||||
'info_dict': {
|
||||
'id': '2249992771553',
|
||||
'id': '2284799043667',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
|
||||
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
|
||||
'timestamp': 1690596000,
|
||||
'duration': 2716.333,
|
||||
'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
|
||||
'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
|
||||
'timestamp': 1700272800,
|
||||
'duration': 2718.833,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20230729',
|
||||
'upload_date': '20231118',
|
||||
'categories': 'count:4',
|
||||
'series': 'The National - Full Show',
|
||||
'tags': 'count:1',
|
||||
'creator': 'News',
|
||||
'location': 'Canada',
|
||||
'media_type': 'Full Program',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -387,7 +407,7 @@ class CBCGemIE(InfoExtractor):
|
||||
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
|
||||
|
||||
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
|
||||
if not secret_xml:
|
||||
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
|
||||
return
|
||||
|
||||
for child in secret_xml:
|
||||
|
||||
@@ -1,252 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class Channel9IE(InfoExtractor):
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'md5': '32083d4eaf1946db6d454313f44510ca',
|
||||
'info_dict': {
|
||||
'id': '6c413323-383a-49dc-88f9-a22800cab024',
|
||||
'ext': 'wmv',
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
|
||||
'duration': 4576,
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'timestamp': 1377717420,
|
||||
'upload_date': '20130828',
|
||||
'session_code': 'KOS002',
|
||||
'session_room': 'Arena 1A',
|
||||
'session_speakers': 'count:5',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
|
||||
'info_dict': {
|
||||
'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
|
||||
'ext': 'wmv',
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
|
||||
'duration': 1540,
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'timestamp': 1386381991,
|
||||
'upload_date': '20131207',
|
||||
'authors': ['Mike Wilmot'],
|
||||
},
|
||||
}, {
|
||||
# low quality mp4 is best
|
||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'info_dict': {
|
||||
'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ranges for the Standard Library',
|
||||
'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
|
||||
'duration': 5646,
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'upload_date': '20150930',
|
||||
'timestamp': 1443640735,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
||||
'info_dict': {
|
||||
'id': 'Events/DEVintersection/DEVintersection-2016',
|
||||
'title': 'DEVintersection 2016 Orlando Sessions',
|
||||
},
|
||||
'playlist_mincount': 14,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||
|
||||
def _extract_list(self, video_id, rss_url=None):
|
||||
if not rss_url:
|
||||
rss_url = self._RSS_URL % video_id
|
||||
rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
|
||||
entries = [self.url_result(session_url.text, 'Channel9')
|
||||
for session_url in rss.findall('./channel/item/link')]
|
||||
title_text = rss.find('./channel/title').text
|
||||
return self.playlist_result(entries, video_id, title_text)
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_path, rss = self._match_valid_url(url).groups()
|
||||
|
||||
if rss:
|
||||
return self._extract_list(content_path, url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, content_path, 'Downloading web page')
|
||||
|
||||
episode_data = self._search_regex(
|
||||
r"data-episode='([^']+)'", webpage, 'episode data', default=None)
|
||||
if episode_data:
|
||||
episode_data = self._parse_json(unescapeHTML(
|
||||
episode_data), content_path)
|
||||
content_id = episode_data['contentId']
|
||||
is_session = '/Sessions(' in episode_data['api']
|
||||
content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
|
||||
if is_session:
|
||||
content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
|
||||
else:
|
||||
content_url += 'Authors,Body&$expand=Authors'
|
||||
content_data = self._download_json(content_url, content_id)
|
||||
title = content_data['Title']
|
||||
|
||||
QUALITIES = (
|
||||
'mp3',
|
||||
'wmv', 'mp4',
|
||||
'wmv-low', 'mp4-low',
|
||||
'wmv-mid', 'mp4-mid',
|
||||
'wmv-high', 'mp4-high',
|
||||
)
|
||||
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
def quality(quality_id, format_url):
|
||||
return (len(QUALITIES) if '_Source.' in format_url
|
||||
else quality_key(quality_id))
|
||||
|
||||
formats = []
|
||||
urls = set()
|
||||
|
||||
SITE_QUALITIES = {
|
||||
'MP3': 'mp3',
|
||||
'MP4': 'mp4',
|
||||
'Low Quality WMV': 'wmv-low',
|
||||
'Low Quality MP4': 'mp4-low',
|
||||
'Mid Quality WMV': 'wmv-mid',
|
||||
'Mid Quality MP4': 'mp4-mid',
|
||||
'High Quality WMV': 'wmv-high',
|
||||
'High Quality MP4': 'mp4-high',
|
||||
}
|
||||
|
||||
formats_select = self._search_regex(
|
||||
r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
|
||||
'formats select', default=None)
|
||||
if formats_select:
|
||||
for mobj in re.finditer(
|
||||
r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
|
||||
formats_select):
|
||||
format_url = mobj.group('url')
|
||||
if format_url in urls:
|
||||
continue
|
||||
urls.add(format_url)
|
||||
format_id = mobj.group('format')
|
||||
quality_id = SITE_QUALITIES.get(format_id, format_id)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': quality_id,
|
||||
'quality': quality(quality_id, format_url),
|
||||
'vcodec': 'none' if quality_id == 'mp3' else None,
|
||||
})
|
||||
|
||||
API_QUALITIES = {
|
||||
'VideoMP4Low': 'mp4-low',
|
||||
'VideoWMV': 'wmv-mid',
|
||||
'VideoMP4Medium': 'mp4-mid',
|
||||
'VideoMP4High': 'mp4-high',
|
||||
'VideoWMVHQ': 'wmv-hq',
|
||||
}
|
||||
|
||||
for format_id, q in API_QUALITIES.items():
|
||||
q_url = content_data.get(format_id)
|
||||
if not q_url or q_url in urls:
|
||||
continue
|
||||
urls.add(q_url)
|
||||
formats.append({
|
||||
'url': q_url,
|
||||
'format_id': q,
|
||||
'quality': quality(q, q_url),
|
||||
})
|
||||
|
||||
slides = content_data.get('Slides')
|
||||
zip_file = content_data.get('ZipFile')
|
||||
|
||||
if not formats and not slides and not zip_file:
|
||||
self.raise_no_formats(
|
||||
'None of recording, slides or zip are available for %s' % content_path)
|
||||
|
||||
subtitles = {}
|
||||
for caption in content_data.get('Captions', []):
|
||||
caption_url = caption.get('Url')
|
||||
if not caption_url:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('Language', 'en'), []).append({
|
||||
'url': caption_url,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
common = {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': clean_html(content_data.get('Description') or content_data.get('Body')),
|
||||
'thumbnail': content_data.get('VideoPlayerPreviewImage'),
|
||||
'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
|
||||
'timestamp': parse_iso8601(content_data.get('PublishedDate')),
|
||||
'avg_rating': int_or_none(content_data.get('Rating')),
|
||||
'rating_count': int_or_none(content_data.get('RatingCount')),
|
||||
'view_count': int_or_none(content_data.get('Views')),
|
||||
'comment_count': int_or_none(content_data.get('CommentCount')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
if is_session:
|
||||
speakers = []
|
||||
for s in content_data.get('Speakers', []):
|
||||
speaker_name = s.get('FullName')
|
||||
if not speaker_name:
|
||||
continue
|
||||
speakers.append(speaker_name)
|
||||
|
||||
common.update({
|
||||
'session_code': content_data.get('Code'),
|
||||
'session_room': content_data.get('Room'),
|
||||
'session_speakers': speakers,
|
||||
})
|
||||
else:
|
||||
authors = []
|
||||
for a in content_data.get('Authors', []):
|
||||
author_name = a.get('DisplayName')
|
||||
if not author_name:
|
||||
continue
|
||||
authors.append(author_name)
|
||||
common['authors'] = authors
|
||||
|
||||
contents = []
|
||||
|
||||
if slides:
|
||||
d = common.copy()
|
||||
d.update({'title': title + '-Slides', 'url': slides})
|
||||
contents.append(d)
|
||||
|
||||
if zip_file:
|
||||
d = common.copy()
|
||||
d.update({'title': title + '-Zip', 'url': zip_file})
|
||||
contents.append(d)
|
||||
|
||||
if formats:
|
||||
d = common.copy()
|
||||
d.update({'title': title, 'formats': formats})
|
||||
contents.append(d)
|
||||
return self.playlist_result(contents)
|
||||
else:
|
||||
return self._extract_list(content_path)
|
||||
@@ -1,88 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class ChirbitIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://chirb.it/be2abG',
|
||||
'info_dict': {
|
||||
'id': 'be2abG',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
||||
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
||||
'duration': 306,
|
||||
'uploader': 'Gerryaudio',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chirb.it/wp/MN58c2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://chirb.it/%s' % audio_id, audio_id)
|
||||
|
||||
data_fd = self._search_regex(
|
||||
r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'data fd', group='url')
|
||||
|
||||
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
|
||||
# for soundURL)
|
||||
audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>',
|
||||
webpage, 'description', default=None)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
uploader = self._search_regex(
|
||||
r'id=["\']chirbit-username["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
class ChirbitProfileIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit:profile'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://chirbit.com/ScarletBeauty',
|
||||
'info_dict': {
|
||||
'id': 'ScarletBeauty',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
profile_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, profile_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
|
||||
for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
|
||||
|
||||
return self.playlist_result(entries, profile_id)
|
||||
139
yt_dlp/extractor/chzzk.py
Normal file
139
yt_dlp/extractor/chzzk.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CHZZKLiveIE(InfoExtractor):
|
||||
IE_NAME = 'chzzk:live'
|
||||
_VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
|
||||
'info_dict': {
|
||||
'id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'channel': '진짜도현',
|
||||
'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1705510344,
|
||||
'upload_date': '20240117',
|
||||
'live_status': 'is_live',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'skip': 'The channel is not currently live',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
live_detail = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
raise ExtractorError('The channel is not currently live', expected=True)
|
||||
|
||||
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_template = traverse_obj(
|
||||
live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
|
||||
if thumbnail_template and '{type}' in thumbnail_template:
|
||||
for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': thumbnail_template.replace('{type}', width),
|
||||
'width': int_or_none(width),
|
||||
})
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
|
||||
is_low_latency = media.get('mediaId') == 'LLHLS'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media['path'], channel_id, 'mp4', fatal=False, live=True,
|
||||
m3u8_id='hls-ll' if is_low_latency else 'hls')
|
||||
for f in fmts:
|
||||
if is_low_latency:
|
||||
f['source_preference'] = -2
|
||||
if '-afragalow.stream-audio.stream' in f['format_id']:
|
||||
f['quality'] = -2
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelId', {str}),
|
||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CHZZKVideoIE(InfoExtractor):
|
||||
IE_NAME = 'chzzk:video'
|
||||
_VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chzzk.naver.com/video/1754',
|
||||
'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
|
||||
'info_dict': {
|
||||
'id': '1754',
|
||||
'ext': 'mp4',
|
||||
'title': '치지직 테스트 방송',
|
||||
'channel': '침착맨',
|
||||
'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 15577,
|
||||
'timestamp': 1702970505.417,
|
||||
'upload_date': '20231219',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'view_count': ('readCount', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelId', {str}),
|
||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
||||
}),
|
||||
}
|
||||
@@ -1,56 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class CinchcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single',
|
||||
'info_dict': {
|
||||
'id': '5258197',
|
||||
'ext': 'mp3',
|
||||
'title': 'Train Your Brain to Up Your Game with Coach Mandy',
|
||||
'upload_date': '20130816',
|
||||
},
|
||||
}, {
|
||||
# Actual test is run in generic, look for undergroundwellness
|
||||
'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
doc = self._download_xml(
|
||||
'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
|
||||
video_id)
|
||||
|
||||
item = doc.find('.//item')
|
||||
title = xpath_text(item, './title', fatal=True)
|
||||
date_str = xpath_text(
|
||||
item, './{http://developer.longtailvideo.com/trac/}date')
|
||||
upload_date = unified_strdate(date_str, day_first=False)
|
||||
# duration is present but wrong
|
||||
formats = [{
|
||||
'format_id': 'main',
|
||||
'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'],
|
||||
}]
|
||||
backup_url = xpath_text(
|
||||
item, './{http://developer.longtailvideo.com/trac/}backupContent')
|
||||
if backup_url:
|
||||
formats.append({
|
||||
'preference': 2, # seems to be more reliable
|
||||
'format_id': 'backup',
|
||||
'url': backup_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands
|
||||
)
|
||||
|
||||
|
||||
class ClipsyndicateIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
|
||||
'info_dict': {
|
||||
'id': '4629301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brick Briscoe',
|
||||
'duration': 612,
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
js_player = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||
video_id, 'Downlaoding player')
|
||||
# it includes a required token
|
||||
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
|
||||
|
||||
pdoc = self._download_xml(
|
||||
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||
video_id, 'Downloading video info',
|
||||
transform_source=fix_xml_ampersands)
|
||||
|
||||
track_doc = pdoc.find('trackList/track')
|
||||
|
||||
def find_param(name):
|
||||
node = find_xpath_attr(track_doc, './/param', 'name', name)
|
||||
if node is not None:
|
||||
return node.attrib['value']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': find_param('title'),
|
||||
'url': track_doc.find('location').text,
|
||||
'thumbnail': find_param('thumbnail'),
|
||||
'duration': int(find_param('duration')),
|
||||
}
|
||||
@@ -46,15 +46,18 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
video_id.split('.')[1] + '==='), video_id)['sub']
|
||||
manifest_base_url = base_url + 'manifest/video.'
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_base_url + 'm3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
_IE_DESC = 'cloudy.ec'
|
||||
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '29832b05028ead1b58be86bf319397ca',
|
||||
'info_dict': {
|
||||
'id': 'af511e2527aac',
|
||||
'ext': 'mp4',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
'upload_date': '20130913',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.cloudy.ec/embed.php', video_id, query={
|
||||
'id': video_id,
|
||||
'playerPage': 1,
|
||||
'autoplay': 1,
|
||||
})
|
||||
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
|
||||
|
||||
if webpage:
|
||||
info.update({
|
||||
'title': self._search_regex(
|
||||
r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
|
||||
'upload date', fatal=False)),
|
||||
'view_count': str_to_int(self._search_regex(
|
||||
r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
|
||||
})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
|
||||
info['id'] = video_id
|
||||
|
||||
return info
|
||||
@@ -6,6 +6,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ClubicIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
|
||||
@@ -4,6 +4,7 @@ from .mtv import MTVIE
|
||||
|
||||
|
||||
class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE
|
||||
_WORKING = False
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)'
|
||||
|
||||
|
||||
@@ -286,6 +286,9 @@ class InfoExtractor:
|
||||
If it is not clear whether to use timestamp or this, use the former
|
||||
release_date: The date (YYYYMMDD) when the video was released in UTC.
|
||||
If not explicitly set, calculated from release_timestamp
|
||||
release_year: Year (YYYY) as integer when the video or album was released.
|
||||
To be used if no exact release date is known.
|
||||
If not explicitly set, calculated from release_date.
|
||||
modified_timestamp: UNIX timestamp of the moment the video was last modified.
|
||||
modified_date: The date (YYYYMMDD) when the video was last modified in UTC.
|
||||
If not explicitly set, calculated from modified_timestamp
|
||||
@@ -379,6 +382,7 @@ class InfoExtractor:
|
||||
'private', 'premium_only', 'subscriber_only', 'needs_auth',
|
||||
'unlisted' or 'public'. Use 'InfoExtractor._availability'
|
||||
to set it
|
||||
media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer"
|
||||
_old_archive_ids: A list of old archive ids needed for backward compatibility
|
||||
_format_sort_fields: A list of fields to use for sorting formats
|
||||
__post_extractor: A function to be called just before the metadata is
|
||||
@@ -427,7 +431,6 @@ class InfoExtractor:
|
||||
and compilations).
|
||||
disc_number: Number of the disc or other physical medium the track belongs to,
|
||||
as an integer.
|
||||
release_year: Year (YYYY) when the album was released.
|
||||
composer: Composer of the piece
|
||||
|
||||
The following fields should only be set for clips that should be cut from the original video:
|
||||
@@ -2225,7 +2228,9 @@ class InfoExtractor:
|
||||
mpd_url, video_id,
|
||||
note='Downloading MPD VOD manifest' if note is None else note,
|
||||
errnote='Failed to download VOD manifest' if errnote is None else errnote,
|
||||
fatal=False, data=data, headers=headers, query=query) or {}
|
||||
fatal=False, data=data, headers=headers, query=query)
|
||||
if not isinstance(mpd_doc, xml.etree.ElementTree.Element):
|
||||
return None
|
||||
return int_or_none(parse_duration(mpd_doc.get('mediaPresentationDuration')))
|
||||
|
||||
@staticmethod
|
||||
@@ -2339,7 +2344,9 @@ class InfoExtractor:
|
||||
imgs_count = 0
|
||||
|
||||
srcs = set()
|
||||
media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
|
||||
media = itertools.chain.from_iterable(
|
||||
smil.findall(self._xpath_ns(arg, namespace))
|
||||
for arg in ['.//video', './/audio', './/media'])
|
||||
for medium in media:
|
||||
src = medium.get('src')
|
||||
if not src or src in srcs:
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CraftsyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.craftsy.com/class/(?P<id>[a-z0-9_-]+)/'
|
||||
_VALID_URL = r'https?://www\.craftsy\.com/class/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.craftsy.com/class/the-midnight-quilt-show-season-5/',
|
||||
'info_dict': {
|
||||
|
||||
@@ -46,6 +46,10 @@ class CWTVIE(InfoExtractor):
|
||||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:4',
|
||||
'episode': 'Episode 20',
|
||||
'season': 'Season 11',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
|
||||
@@ -45,7 +45,7 @@ class CybraryBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class CybraryIE(CybraryBaseIE):
|
||||
_VALID_URL = r'https?://app.cybrary.it/immersive/(?P<enrollment>[0-9]+)/activity/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://app\.cybrary\.it/immersive/(?P<enrollment>[0-9]+)/activity/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.cybrary.it/immersive/12487950/activity/63102',
|
||||
'md5': '9ae12d37e555cb2ed554223a71a701d0',
|
||||
@@ -105,12 +105,12 @@ class CybraryIE(CybraryBaseIE):
|
||||
'chapter': module.get('title'),
|
||||
'chapter_id': str_or_none(module.get('id')),
|
||||
'title': activity.get('title'),
|
||||
'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'http_headers': {'Referer': 'https://api.cybrary.it'}})
|
||||
'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'})
|
||||
}
|
||||
|
||||
|
||||
class CybraryCourseIE(CybraryBaseIE):
|
||||
_VALID_URL = r'https://app.cybrary.it/browse/course/(?P<id>[\w-]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https://app\.cybrary\.it/browse/course/(?P<id>[\w-]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class DaftsexIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?daft\.sex/watch/(?P<id>-?\d+_\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://daft.sex/watch/-35370899_456246186',
|
||||
'md5': '64c04ef7b4c7b04b308f3b0c78efe7cd',
|
||||
'info_dict': {
|
||||
'id': '-35370899_456246186',
|
||||
'ext': 'mp4',
|
||||
'title': 'just relaxing',
|
||||
'description': 'just relaxing – Watch video Watch video in high quality',
|
||||
'upload_date': '20201113',
|
||||
'timestamp': 1605261911,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'duration': 15.0,
|
||||
'view_count': int
|
||||
},
|
||||
}, {
|
||||
'url': 'https://daft.sex/watch/-156601359_456242791',
|
||||
'info_dict': {
|
||||
'id': '-156601359_456242791',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skye Blue - Dinner And A Show',
|
||||
'description': 'Skye Blue - Dinner And A Show - Watch video Watch video in high quality',
|
||||
'upload_date': '20200916',
|
||||
'timestamp': 1600250735,
|
||||
'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
|
||||
},
|
||||
'skip': 'deleted / private'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_meta('name', webpage, 'title')
|
||||
timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
|
||||
description = self._html_search_meta('description', webpage, 'Description', default=None)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Duration: ((?:[0-9]{2}:){0,2}[0-9]{2})',
|
||||
webpage, 'duration', fatal=False))
|
||||
views = parse_count(self._search_regex(
|
||||
r'Views: ([0-9 ]+)',
|
||||
webpage, 'views', fatal=False))
|
||||
|
||||
player_hash = self._search_regex(
|
||||
r'DaxabPlayer\.Init\({[\s\S]*hash:\s*"([0-9a-zA-Z_\-]+)"[\s\S]*}',
|
||||
webpage, 'player hash')
|
||||
player_color = self._search_regex(
|
||||
r'DaxabPlayer\.Init\({[\s\S]*color:\s*"([0-9a-z]+)"[\s\S]*}',
|
||||
webpage, 'player color', fatal=False) or ''
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
'https://dxb.to/player/%s?color=%s' % (player_hash, player_color),
|
||||
video_id, headers={'Referer': url})
|
||||
video_params = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.globParams\s*=\s*({[\S\s]+})\s*;\s*<\/script>',
|
||||
embed_page, 'video parameters'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
server_domain = 'https://%s' % compat_b64decode(video_params['server'][::-1]).decode('utf-8')
|
||||
|
||||
cdn_files = traverse_obj(video_params, ('video', 'cdn_files')) or {}
|
||||
if cdn_files:
|
||||
formats = []
|
||||
for format_id, format_data in cdn_files.items():
|
||||
ext, height = format_id.split('_')
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={format_data.split(".")[-1]}',
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': try_get(video_params, lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8')),
|
||||
'timestamp': timestamp,
|
||||
'view_count': views,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
items = self._download_json(
|
||||
f'{server_domain}/method/video.get/{video_id}', video_id,
|
||||
headers={'Referer': url}, query={
|
||||
'token': video_params['video']['access_token'],
|
||||
'videos': video_id,
|
||||
'ckey': video_params['c_key'],
|
||||
'credentials': video_params['video']['credentials'],
|
||||
})['response']['items']
|
||||
|
||||
if not items:
|
||||
raise ExtractorError('Video is not available', video_id=video_id, expected=True)
|
||||
|
||||
item = items[0]
|
||||
formats = []
|
||||
for f_id, f_url in item.get('files', {}).items():
|
||||
if f_id == 'external':
|
||||
return self.url_result(f_url)
|
||||
ext, height = f_id.split('_')
|
||||
height_extra_key = traverse_obj(video_params, ('video', 'partial', 'quality', height))
|
||||
if height_extra_key:
|
||||
formats.append({
|
||||
'format_id': f'{height}p',
|
||||
'url': f'{server_domain}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for k, v in item.items():
|
||||
if k.startswith('photo_') and v:
|
||||
width = k.replace('photo_', '')
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': v,
|
||||
'width': int_or_none(width),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'comment_count': int_or_none(item.get('comments')),
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'view_count': views,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -93,7 +93,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player\.html\?)?video|swf)|
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
)
|
||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
@@ -107,13 +107,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'id': 'x5kesuj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'duration': 187,
|
||||
'timestamp': 1493651285,
|
||||
'upload_date': '20170501',
|
||||
'uploader': 'Deadline',
|
||||
'uploader_id': 'x1xm8ri',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
||||
@@ -132,7 +136,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['en_quete_d_esprit'],
|
||||
'thumbnail': 'https://s2.dmcdn.net/v/Tncwi1YGKdvFbDuDY/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
@@ -201,6 +205,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/x86gw.html?video=k46oCapRs4iikoz9DWy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_COMMON_MEDIA_FIELDS = '''description
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DefenseGouvFrIE(InfoExtractor):
|
||||
IE_NAME = 'defense.gouv.fr'
|
||||
_VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
|
||||
'md5': '75bba6124da7e63d2d60b5244ec9430c',
|
||||
'info_dict': {
|
||||
'id': '11213',
|
||||
'ext': 'mp4',
|
||||
'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = self._match_id(url)
|
||||
webpage = self._download_webpage(url, title)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r"flashvars.pvg_id=\"(\d+)\";",
|
||||
webpage, 'ID')
|
||||
|
||||
json_url = (
|
||||
'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
|
||||
video_id)
|
||||
info = self._download_json(json_url, title, 'Downloading JSON config')
|
||||
video_url = info['renditions'][0]['url']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
}
|
||||
@@ -3,6 +3,7 @@ from ..utils import parse_duration
|
||||
|
||||
|
||||
class DHMIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
|
||||
_VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
|
||||
'md5': '21c7ff600f545358134fea762a6d42b6',
|
||||
'info_dict': {
|
||||
'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09',
|
||||
'ext': 'flv',
|
||||
'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever',
|
||||
'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p',
|
||||
'duration': 198,
|
||||
'uploader': 'liuxt',
|
||||
'timestamp': 1385778501.104,
|
||||
'upload_date': '20131130',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6',
|
||||
'md5': '2bb4a83896434d5c26be868c609429a3',
|
||||
'info_dict': {
|
||||
'id': '168006778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Apartments and flats in Raipur the white symphony',
|
||||
'description': 'md5:784d0639e6b7d1bc29530878508e38fe',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
|
||||
'duration': 290,
|
||||
'timestamp': 1476767794.2809999,
|
||||
'upload_date': '20161018',
|
||||
'uploader': 'parthivi001',
|
||||
'uploader_id': 'user52596202',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
|
||||
video_url = info.get('mediaURI')
|
||||
|
||||
if not video_url:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
[r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
|
||||
webpage, 'video url', default=None)
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
||||
if not video_url:
|
||||
setup_data = self._parse_json(self._html_search_regex(
|
||||
r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1',
|
||||
webpage, 'setup data', group='content'), video_id)
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': setup_data['src'],
|
||||
}
|
||||
|
||||
info_dict.update({
|
||||
'title': info['title'],
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('screenshotURI'),
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
'uploader': info.get('user'),
|
||||
'timestamp': float_or_none(info.get('dateCreated'), 1000),
|
||||
'view_count': int_or_none(info.get('numberOfViews')),
|
||||
})
|
||||
|
||||
return info_dict
|
||||
@@ -1,21 +1,17 @@
|
||||
import binascii
|
||||
import hashlib
|
||||
import re
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
parse_iso8601,
|
||||
try_call,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
SERIES_API = 'https://production-cdn.dr-massive.com/api/page?device=web_browser&item_detail_expand=all&lang=da&max_list_prefetch=3&path=%s'
|
||||
|
||||
@@ -24,7 +20,7 @@ class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
|
||||
(?:www\.)?dr\.dk/tv/se(?:/ondemand)?/(?:[^/?#]+/)*|
|
||||
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
|
||||
)
|
||||
(?P<id>[\da-z_-]+)
|
||||
@@ -53,22 +49,6 @@ class DRTVIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
'skip': 'this video has been removed',
|
||||
}, {
|
||||
# embed
|
||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||
'info_dict': {
|
||||
'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
|
||||
'ext': 'mp4',
|
||||
'title': 'christiania pusher street ryddes drdkrjpo',
|
||||
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
|
||||
'timestamp': 1472800279,
|
||||
'upload_date': '20160902',
|
||||
'duration': 131.4,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# with SignLanguage formats
|
||||
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
|
||||
@@ -87,33 +67,54 @@ class DRTVIE(InfoExtractor):
|
||||
'season': 'Historien om Danmark',
|
||||
'series': 'Historien om Danmark',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this video has been removed',
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
|
||||
'url': 'https://www.dr.dk/drtv/se/frank-and-kastaniegaarden_71769',
|
||||
'info_dict': {
|
||||
'id': '00951930010',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bonderøven 2019 (1:8)',
|
||||
'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
|
||||
'timestamp': 1654856100,
|
||||
'upload_date': '20220610',
|
||||
'duration': 2576.6,
|
||||
'season': 'Bonderøven 2019',
|
||||
'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
|
||||
'title': 'Frank & Kastaniegaarden',
|
||||
'description': 'md5:974e1780934cf3275ef10280204bccb0',
|
||||
'release_timestamp': 1546545600,
|
||||
'release_date': '20190103',
|
||||
'duration': 2576,
|
||||
'season': 'Frank & Kastaniegaarden',
|
||||
'season_id': '67125',
|
||||
'release_year': 2019,
|
||||
'season_number': 2019,
|
||||
'series': 'Frank & Kastaniegaarden',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode': 'Frank & Kastaniegaarden',
|
||||
'thumbnail': r're:https?://.+',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Foreign and Regular subtitle track
|
||||
'url': 'https://www.dr.dk/drtv/se/spise-med-price_-pasta-selv_397445',
|
||||
'info_dict': {
|
||||
'id': '00212301010',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'title': 'Spise med Price: Pasta Selv',
|
||||
'alt_title': '1. Pasta Selv',
|
||||
'release_date': '20230807',
|
||||
'description': 'md5:2da9060524fed707810d71080b3d0cd8',
|
||||
'duration': 1750,
|
||||
'season': 'Spise med Price',
|
||||
'release_timestamp': 1691438400,
|
||||
'season_id': '397440',
|
||||
'episode': 'Spise med Price: Pasta Selv',
|
||||
'thumbnail': r're:https?://.+',
|
||||
'season_number': 15,
|
||||
'series': 'Spise med Price',
|
||||
'release_year': 2022,
|
||||
'subtitles': 'mincount:2',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
|
||||
'only_matching': True,
|
||||
@@ -123,226 +124,127 @@ class DRTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/drtv/program/jagten_220924',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/lyd/p4aarhus/regionale-nyheder-ar4/regionale-nyheder-2022-05-05-12-30-3',
|
||||
'info_dict': {
|
||||
'id': 'urn:dr:mu:programcard:6265cb2571401424d0360113',
|
||||
'title': "Regionale nyheder",
|
||||
'ext': 'mp4',
|
||||
'duration': 120.043,
|
||||
'series': 'P4 Østjylland regionale nyheder',
|
||||
'timestamp': 1651746600,
|
||||
'season': 'Regionale nyheder',
|
||||
'release_year': 0,
|
||||
'season_id': 'urn:dr:mu:bundle:61c26889539f0201586b73c5',
|
||||
'description': '',
|
||||
'upload_date': '20220505',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this video has been removed',
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '14802310112',
|
||||
'timestamp': 1678786200,
|
||||
'duration': 120.043,
|
||||
'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
|
||||
'series': 'P4 København regionale nyheder',
|
||||
'upload_date': '20230314',
|
||||
'release_year': 0,
|
||||
'description': 'Hør seneste regionale nyheder fra P4 København.',
|
||||
'season': 'Regionale nyheder',
|
||||
'title': 'Regionale nyheder',
|
||||
},
|
||||
}]
|
||||
|
||||
SUBTITLE_LANGS = {
|
||||
'DanishLanguageSubtitles': 'da',
|
||||
'ForeignLanguageSubtitles': 'da_foreign',
|
||||
'CombinedLanguageSubtitles': 'da_combined',
|
||||
}
|
||||
|
||||
_TOKEN = None
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._TOKEN:
|
||||
return
|
||||
|
||||
token_response = self._download_json(
|
||||
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
note='Downloading anonymous token', headers={
|
||||
'content-type': 'application/json',
|
||||
}, query={
|
||||
'device': 'web_browser',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'lang': 'da',
|
||||
'supportFallbackToken': 'true',
|
||||
}, data=json.dumps({
|
||||
'deviceId': str(uuid.uuid4()),
|
||||
'scopes': ['Catalog'],
|
||||
'optout': True,
|
||||
}).encode())
|
||||
|
||||
self._TOKEN = traverse_obj(
|
||||
token_response, (lambda _, x: x['type'] == 'UserAccount', 'value', {str}), get_all=False)
|
||||
if not self._TOKEN:
|
||||
raise ExtractorError('Unable to get anonymous token')
|
||||
|
||||
def _real_extract(self, url):
|
||||
raw_video_id, is_radio_url = self._match_valid_url(url).group('id', 'radio')
|
||||
url_slug = self._match_id(url)
|
||||
webpage = self._download_webpage(url, url_slug)
|
||||
|
||||
webpage = self._download_webpage(url, raw_video_id)
|
||||
|
||||
if '>Programmet er ikke længere tilgængeligt' in webpage:
|
||||
raise ExtractorError(
|
||||
'Video %s is not available' % raw_video_id, expected=True)
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
|
||||
webpage, 'urn', default=None)
|
||||
if video_id:
|
||||
video_id = compat_urllib_parse_unquote(video_id)
|
||||
|
||||
_PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard'
|
||||
query = {'expanded': 'true'}
|
||||
|
||||
if video_id:
|
||||
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
|
||||
json_data = self._search_json(
|
||||
r'window\.__data\s*=', webpage, 'data', url_slug, fatal=False) or {}
|
||||
item = traverse_obj(
|
||||
json_data, ('cache', 'page', ..., (None, ('entries', 0)), 'item', {dict}), get_all=False)
|
||||
if item:
|
||||
item_id = item.get('id')
|
||||
else:
|
||||
programcard_url = _PROGRAMCARD_BASE
|
||||
if is_radio_url:
|
||||
video_id = self._search_nextjs_data(
|
||||
webpage, raw_video_id)['props']['pageProps']['episode']['productionNumber']
|
||||
else:
|
||||
json_data = self._search_json(
|
||||
r'window\.__data\s*=', webpage, 'data', raw_video_id)
|
||||
video_id = traverse_obj(json_data, (
|
||||
'cache', 'page', ..., (None, ('entries', 0)), 'item', 'customId',
|
||||
{lambda x: x.split(':')[-1]}), get_all=False)
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract video id')
|
||||
query['productionnumber'] = video_id
|
||||
item_id = url_slug.rsplit('_', 1)[-1]
|
||||
item = self._download_json(
|
||||
f'https://production-cdn.dr-massive.com/api/items/{item_id}', item_id,
|
||||
note='Attempting to download backup item data', query={
|
||||
'device': 'web_browser',
|
||||
'expand': 'all',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'geoLocation': 'dk',
|
||||
'isDeviceAbroad': 'false',
|
||||
'lang': 'da',
|
||||
'segments': 'drtv,optedout',
|
||||
'sub': 'Anonymous',
|
||||
})
|
||||
|
||||
data = self._download_json(
|
||||
programcard_url, video_id, 'Downloading video JSON', query=query)
|
||||
|
||||
supplementary_data = {}
|
||||
if re.search(r'_\d+$', raw_video_id):
|
||||
supplementary_data = self._download_json(
|
||||
SERIES_API % f'/episode/{raw_video_id}', raw_video_id, fatal=False) or {}
|
||||
|
||||
title = str_or_none(data.get('Title')) or re.sub(
|
||||
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
|
||||
self._og_search_title(webpage))
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or data.get('Description')
|
||||
|
||||
timestamp = unified_timestamp(
|
||||
data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
|
||||
|
||||
thumbnail = None
|
||||
duration = None
|
||||
|
||||
restricted_to_denmark = False
|
||||
video_id = try_call(lambda: item['customId'].rsplit(':', 1)[-1]) or item_id
|
||||
stream_data = self._download_json(
|
||||
f'https://production.dr-massive.com/api/account/items/{item_id}/videos', video_id,
|
||||
note='Downloading stream data', query={
|
||||
'delivery': 'stream',
|
||||
'device': 'web_browser',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'lang': 'da',
|
||||
'resolution': 'HD-1080',
|
||||
'sub': 'Anonymous',
|
||||
}, headers={'authorization': f'Bearer {self._TOKEN}'})
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for stream in traverse_obj(stream_data, (lambda _, x: x['url'])):
|
||||
format_id = stream.get('format', 'na')
|
||||
access_service = stream.get('accessService')
|
||||
preference = None
|
||||
subtitle_suffix = ''
|
||||
if access_service in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
|
||||
preference = -1
|
||||
format_id += f'-{access_service}'
|
||||
subtitle_suffix = f'-{access_service}'
|
||||
elif access_service == 'StandardVideo':
|
||||
preference = 1
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream.get('url'), video_id, ext='mp4', preference=preference, m3u8_id=format_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
|
||||
assets = []
|
||||
primary_asset = data.get('PrimaryAsset')
|
||||
if isinstance(primary_asset, dict):
|
||||
assets.append(primary_asset)
|
||||
secondary_assets = data.get('SecondaryAssets')
|
||||
if isinstance(secondary_assets, list):
|
||||
for secondary_asset in secondary_assets:
|
||||
if isinstance(secondary_asset, dict):
|
||||
assets.append(secondary_asset)
|
||||
api_subtitles = traverse_obj(stream, ('subtitles', lambda _, v: url_or_none(v['link']), {dict}))
|
||||
if not api_subtitles:
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
def hex_to_bytes(hex):
|
||||
return binascii.a2b_hex(hex.encode('ascii'))
|
||||
for sub_track in api_subtitles:
|
||||
lang = sub_track.get('language') or 'da'
|
||||
subtitles.setdefault(self.SUBTITLE_LANGS.get(lang, lang) + subtitle_suffix, []).append({
|
||||
'url': sub_track['link'],
|
||||
'ext': mimetype2ext(sub_track.get('format')) or 'vtt'
|
||||
})
|
||||
|
||||
def decrypt_uri(e):
|
||||
n = int(e[2:10], 16)
|
||||
a = e[10 + n:]
|
||||
data = hex_to_bytes(e[10:10 + n])
|
||||
key = hashlib.sha256(('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()
|
||||
iv = hex_to_bytes(a)
|
||||
decrypted = unpad_pkcs7(aes_cbc_decrypt_bytes(data, key, iv))
|
||||
return decrypted.decode('utf-8').split('?')[0]
|
||||
|
||||
for asset in assets:
|
||||
kind = asset.get('Kind')
|
||||
if kind == 'Image':
|
||||
thumbnail = url_or_none(asset.get('Uri'))
|
||||
elif kind in ('VideoResource', 'AudioResource'):
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
asset_target = asset.get('Target')
|
||||
for link in asset.get('Links', []):
|
||||
uri = link.get('Uri')
|
||||
if not uri:
|
||||
encrypted_uri = link.get('EncryptedUri')
|
||||
if not encrypted_uri:
|
||||
continue
|
||||
try:
|
||||
uri = decrypt_uri(encrypted_uri)
|
||||
except Exception:
|
||||
self.report_warning(
|
||||
'Unable to decrypt EncryptedUri', video_id)
|
||||
continue
|
||||
uri = url_or_none(uri)
|
||||
if not uri:
|
||||
continue
|
||||
target = link.get('Target')
|
||||
format_id = target or ''
|
||||
if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
|
||||
preference = -1
|
||||
format_id += '-%s' % asset_target
|
||||
elif asset_target == 'Default':
|
||||
preference = 1
|
||||
else:
|
||||
preference = None
|
||||
if target == 'HDS':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||
video_id, preference, f4m_id=format_id, fatal=False)
|
||||
if kind == 'AudioResource':
|
||||
for f in f4m_formats:
|
||||
f['vcodec'] = 'none'
|
||||
formats.extend(f4m_formats)
|
||||
elif target == 'HLS':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
quality=preference, m3u8_id=format_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
bitrate = link.get('Bitrate')
|
||||
if bitrate:
|
||||
format_id += '-%s' % bitrate
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': format_id,
|
||||
'tbr': int_or_none(bitrate),
|
||||
'ext': link.get('FileFormat'),
|
||||
'vcodec': 'none' if kind == 'AudioResource' else None,
|
||||
'quality': preference,
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
|
||||
if isinstance(subtitles_list, list):
|
||||
LANGS = {
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
if not isinstance(subs, dict):
|
||||
continue
|
||||
sub_uri = url_or_none(subs.get('Uri'))
|
||||
if not sub_uri:
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': sub_uri,
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
self.raise_geo_restricted(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
if not formats and traverse_obj(item, ('season', 'customFields', 'IsGeoRestricted')):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': str_or_none(data.get('SeriesTitle')),
|
||||
'season': str_or_none(data.get('SeasonTitle')),
|
||||
'season_number': int_or_none(data.get('SeasonNumber')),
|
||||
'season_id': str_or_none(data.get('SeasonUrn')),
|
||||
'episode': traverse_obj(supplementary_data, ('entries', 0, 'item', 'contextualTitle')) or str_or_none(data.get('EpisodeTitle')),
|
||||
'episode_number': traverse_obj(supplementary_data, ('entries', 0, 'item', 'episodeNumber')) or int_or_none(data.get('EpisodeNumber')),
|
||||
'release_year': int_or_none(data.get('ProductionYear')),
|
||||
**traverse_obj(item, {
|
||||
'title': 'title',
|
||||
'alt_title': 'contextualTitle',
|
||||
'description': 'description',
|
||||
'thumbnail': ('images', 'wallpaper'),
|
||||
'release_timestamp': ('customFields', 'BroadcastTimeDK', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'series': ('season', 'show', 'title'),
|
||||
'season': ('season', 'title'),
|
||||
'season_number': ('season', 'seasonNumber', {int_or_none}),
|
||||
'season_id': 'seasonId',
|
||||
'episode': 'episodeName',
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'release_year': ('releaseYear', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
@@ -412,6 +314,8 @@ class DRTVSeasonIE(InfoExtractor):
|
||||
'display_id': 'frank-and-kastaniegaarden',
|
||||
'title': 'Frank & Kastaniegaarden',
|
||||
'series': 'Frank & Kastaniegaarden',
|
||||
'season_number': 2008,
|
||||
'alt_title': 'Season 2008',
|
||||
},
|
||||
'playlist_mincount': 8
|
||||
}, {
|
||||
@@ -421,6 +325,8 @@ class DRTVSeasonIE(InfoExtractor):
|
||||
'display_id': 'frank-and-kastaniegaarden',
|
||||
'title': 'Frank & Kastaniegaarden',
|
||||
'series': 'Frank & Kastaniegaarden',
|
||||
'season_number': 2009,
|
||||
'alt_title': 'Season 2009',
|
||||
},
|
||||
'playlist_mincount': 19
|
||||
}]
|
||||
@@ -434,6 +340,7 @@ class DRTVSeasonIE(InfoExtractor):
|
||||
'url': f'https://www.dr.dk/drtv{episode["path"]}',
|
||||
'ie_key': DRTVIE.ie_key(),
|
||||
'title': episode.get('title'),
|
||||
'alt_title': episode.get('contextualTitle'),
|
||||
'episode': episode.get('episodeName'),
|
||||
'description': episode.get('shortDescription'),
|
||||
'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
|
||||
@@ -446,6 +353,7 @@ class DRTVSeasonIE(InfoExtractor):
|
||||
'id': season_id,
|
||||
'display_id': display_id,
|
||||
'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
|
||||
'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')),
|
||||
'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
|
||||
'entries': entries,
|
||||
'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber'))
|
||||
@@ -463,6 +371,7 @@ class DRTVSeriesIE(InfoExtractor):
|
||||
'display_id': 'frank-and-kastaniegaarden',
|
||||
'title': 'Frank & Kastaniegaarden',
|
||||
'series': 'Frank & Kastaniegaarden',
|
||||
'alt_title': '',
|
||||
},
|
||||
'playlist_mincount': 15
|
||||
}]
|
||||
@@ -476,6 +385,7 @@ class DRTVSeriesIE(InfoExtractor):
|
||||
'url': f'https://www.dr.dk/drtv{season.get("path")}',
|
||||
'ie_key': DRTVSeasonIE.ie_key(),
|
||||
'title': season.get('title'),
|
||||
'alt_title': season.get('contextualTitle'),
|
||||
'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
|
||||
'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber'))
|
||||
} for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))]
|
||||
@@ -485,6 +395,7 @@ class DRTVSeriesIE(InfoExtractor):
|
||||
'id': series_id,
|
||||
'display_id': display_id,
|
||||
'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
|
||||
'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')),
|
||||
'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
|
||||
'entries': entries
|
||||
}
|
||||
|
||||
@@ -138,7 +138,7 @@ class DubokuIE(InfoExtractor):
|
||||
# of the video.
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(data_url, {'http_headers': headers}),
|
||||
'url': smuggle_url(data_url, {'referer': webpage_url}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': series_title,
|
||||
|
||||
104
yt_dlp/extractor/duoplay.py
Normal file
104
yt_dlp/extractor/duoplay.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
str_or_none,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DuoplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
|
||||
_TESTS = [{
|
||||
'note': 'Siberi võmm S02E12',
|
||||
'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
|
||||
'md5': '1ff59d535310ac9c5cf5f287d8f91b2d',
|
||||
'info_dict': {
|
||||
'id': '4312_24',
|
||||
'ext': 'mp4',
|
||||
'title': 'Operatsioon "Öö"',
|
||||
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
|
||||
'description': 'md5:8ef98f38569d6b8b78f3d350ccc6ade8',
|
||||
'upload_date': '20170523',
|
||||
'timestamp': 1495567800,
|
||||
'series': 'Siberi võmm',
|
||||
'series_id': '4312',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode': 'Operatsioon "Öö"',
|
||||
'episode_number': 12,
|
||||
'episode_id': 24,
|
||||
},
|
||||
}, {
|
||||
'note': 'Empty title',
|
||||
'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
|
||||
'md5': '6aca68be71112314738dd17cced7f8bf',
|
||||
'info_dict': {
|
||||
'id': '17_14',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ühikarotid',
|
||||
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
|
||||
'description': 'md5:4719b418e058c209def41d48b601276e',
|
||||
'upload_date': '20100916',
|
||||
'timestamp': 1284661800,
|
||||
'series': 'Ühikarotid',
|
||||
'series_id': '17',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 14,
|
||||
'release_year': 2010,
|
||||
},
|
||||
}, {
|
||||
'note': 'Movie without expiry',
|
||||
'url': 'https://duoplay.ee/5501/pilvede-all.-neljas-ode',
|
||||
'md5': '7abf63d773a49ef7c39f2c127842b8fd',
|
||||
'info_dict': {
|
||||
'id': '5501',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pilvede all. Neljas õde',
|
||||
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
|
||||
'description': 'md5:d86a70f8f31e82c369d4d4f4c79b1279',
|
||||
'cast': 'count:9',
|
||||
'upload_date': '20221214',
|
||||
'timestamp': 1671054000,
|
||||
'release_year': 2018,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
telecast_id, episode = self._match_valid_url(url).group('id', 'ep')
|
||||
video_id = join_nonempty(telecast_id, episode, delim='_')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_player = try_call(lambda: extract_attributes(
|
||||
get_element_text_and_html_by_tag('video-player', webpage)[1]))
|
||||
if not video_player or not video_player.get('manifest-url'):
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
|
||||
**traverse_obj(episode_attr, {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
'thumbnail': ('images', 'original'),
|
||||
'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
|
||||
'cast': ('cast', {lambda x: x.split(', ')}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
**(traverse_obj(episode_attr, {
|
||||
'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))),
|
||||
'series': 'title',
|
||||
'series_id': ('telecast_id', {str_or_none}),
|
||||
'season_number': ('season_id', {int_or_none}),
|
||||
'episode': 'subtitle',
|
||||
'episode_number': ('episode_nr', {int_or_none}),
|
||||
'episode_id': ('episode_id', {int_or_none}),
|
||||
}, get_all=False) if episode_attr.get('category') != 'movies' else {}),
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EchoMskIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.echo.msk.ru/sounds/1464134.html',
|
||||
'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
|
||||
'info_dict': {
|
||||
'id': '1464134',
|
||||
'ext': 'mp3',
|
||||
'title': 'Особое мнение - 29 декабря 2014, 19:08',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
|
||||
webpage, 'title')
|
||||
|
||||
air_date = self._html_search_regex(
|
||||
r'(?s)<div class="date">(.+?)</div>',
|
||||
webpage, 'date', fatal=False, default=None)
|
||||
|
||||
if air_date:
|
||||
air_date = re.sub(r'(\s)\1+', r'\1', air_date)
|
||||
if air_date:
|
||||
title = '%s - %s' % (title, air_date)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class EHowIE(InfoExtractor):
|
||||
IE_NAME = 'eHow'
|
||||
_VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
|
||||
'md5': '9809b4e3f115ae2088440bcb4efbf371',
|
||||
'info_dict': {
|
||||
'id': '12245069',
|
||||
'ext': 'flv',
|
||||
'title': 'Hardwood Flooring Basics',
|
||||
'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
|
||||
'uploader': 'Erick Nathan',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
|
||||
final_url = compat_urllib_parse_unquote(video_url)
|
||||
uploader = self._html_search_meta('uploader', webpage)
|
||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': uploader,
|
||||
}
|
||||
72
yt_dlp/extractor/elementorembed.py
Normal file
72
yt_dlp/extractor/elementorembed.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import unescapeHTML, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ElementorEmbedIE(InfoExtractor):
|
||||
_VALID_URL = False
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
|
||||
'info_dict': {
|
||||
'id': 'KgzuxwuQwM4',
|
||||
'ext': 'mp4',
|
||||
'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
|
||||
'playable_in_embed': True,
|
||||
'tags': 'count:16',
|
||||
'like_count': int,
|
||||
'channel': 'Capital TV Cyprus',
|
||||
'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
|
||||
'availability': 'public',
|
||||
'description': 'md5:7a3308a22881aea4612358c4ba121f77',
|
||||
'duration': 2891,
|
||||
'upload_date': '20231214',
|
||||
'uploader_id': '@capitaltvcyprus6389',
|
||||
'live_status': 'not_live',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
|
||||
'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
|
||||
'uploader': 'Capital TV Cyprus',
|
||||
'age_limit': 0,
|
||||
'categories': ['News & Politics'],
|
||||
'view_count': int,
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
|
||||
'info_dict': {
|
||||
'id': '?playlist=76011151&video=9e59909',
|
||||
'title': 'Theme Builder Collection - Academy',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1702196984.0,
|
||||
'upload_date': '20231210',
|
||||
'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
|
||||
'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
for data_settings in re.findall(self._WIDGET_REGEX, webpage):
|
||||
data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
|
||||
if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
|
||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
||||
|
||||
for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
|
||||
if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
|
||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
||||
if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
|
||||
yield self.url_result(vimeo_url, ie=VimeoIE)
|
||||
for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
|
||||
yield {
|
||||
'id': video['_id'],
|
||||
'url': direct_url,
|
||||
'title': video.get('title'),
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ElevenSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?elevensports\.com/view/event/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://elevensports.com/view/event/clf46yr3kenn80jgrqsjmwefk',
|
||||
'md5': 'c0958d9ff90e4503a75544358758921d',
|
||||
'info_dict': {
|
||||
'id': 'clf46yr3kenn80jgrqsjmwefk',
|
||||
'title': 'Cleveland SC vs Lionsbridge FC',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:03b5238d6549f4ea1fddadf69b5e0b58',
|
||||
'upload_date': '20230323',
|
||||
'timestamp': 1679612400,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'url': 'https://elevensports.com/view/event/clhpyd53b06160jez74qhgkmf',
|
||||
'md5': 'c0958d9ff90e4503a75544358758921d',
|
||||
'info_dict': {
|
||||
'id': 'clhpyd53b06160jez74qhgkmf',
|
||||
'title': 'AJNLF vs ARRAF',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:c8c5e75c78f37c6d15cd6c475e43a8c1',
|
||||
'upload_date': '20230521',
|
||||
'timestamp': 1684684800,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
event_id = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['event']['mclsEventId']
|
||||
event_data = self._download_json(
|
||||
f'https://mcls-api.mycujoo.tv/bff/events/v1beta1/{event_id}', video_id,
|
||||
headers={'Authorization': 'Bearer FBVKACGN37JQC5SFA0OVK8KKSIOP153G'})
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
event_data['streams'][0]['full_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(event_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('start_time', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class EllenTubeBaseIE(InfoExtractor):
|
||||
def _extract_data_config(self, webpage, video_id):
|
||||
details = self._search_regex(
|
||||
r'(<[^>]+\bdata-component=(["\'])[Dd]etails.+?></div>)', webpage,
|
||||
'details')
|
||||
return self._parse_json(
|
||||
extract_attributes(details)['data-config'], video_id)
|
||||
|
||||
def _extract_video(self, data, video_id):
|
||||
title = data['title']
|
||||
|
||||
formats = []
|
||||
duration = None
|
||||
for entry in data.get('media'):
|
||||
if entry.get('id') == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
entry['url'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
duration = int_or_none(entry.get('duration'))
|
||||
break
|
||||
|
||||
def get_insight(kind):
|
||||
return int_or_none(try_get(
|
||||
data, lambda x: x['insight']['%ss' % kind]))
|
||||
|
||||
return {
|
||||
'extractor_key': EllenTubeIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': data.get('description'),
|
||||
'duration': duration,
|
||||
'thumbnail': data.get('thumbnail'),
|
||||
'timestamp': float_or_none(data.get('publishTime'), scale=1000),
|
||||
'view_count': get_insight('view'),
|
||||
'like_count': get_insight('like'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class EllenTubeIE(EllenTubeBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
ellentube:|
|
||||
https://api-prod\.ellentube\.com/ellenapi/api/item/
|
||||
)
|
||||
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://api-prod.ellentube.com/ellenapi/api/item/0822171c-3829-43bf-b99f-d77358ae75e3',
|
||||
'md5': '2fabc277131bddafdd120e0fc0f974c9',
|
||||
'info_dict': {
|
||||
'id': '0822171c-3829-43bf-b99f-d77358ae75e3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck',
|
||||
'description': 'md5:76e3355e2242a78ad9e3858e5616923f',
|
||||
'thumbnail': r're:^https?://.+?',
|
||||
'duration': 514,
|
||||
'timestamp': 1508505120,
|
||||
'upload_date': '20171020',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id,
|
||||
video_id)
|
||||
return self._extract_video(data, video_id)
|
||||
|
||||
|
||||
class EllenTubeVideoIE(EllenTubeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P<id>.+?)\.html'
|
||||
_TEST = {
|
||||
'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._extract_data_config(webpage, display_id)['id']
|
||||
return self.url_result(
|
||||
'ellentube:%s' % video_id, ie=EllenTubeIE.ie_key(),
|
||||
video_id=video_id)
|
||||
|
||||
|
||||
class EllenTubePlaylistIE(EllenTubeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P<id>.+?)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html',
|
||||
'info_dict': {
|
||||
'id': 'dax-shepard-jordan-fisher-haim',
|
||||
'title': "Dax Shepard, 'DWTS' Team Jordan Fisher & Lindsay Arnold, HAIM",
|
||||
'description': 'md5:bfc982194dabb3f4e325e43aa6b2e21c',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = self._extract_data_config(webpage, display_id)['data']
|
||||
feed = self._download_json(
|
||||
'https://api-prod.ellentube.com/ellenapi/api/feed/?%s'
|
||||
% data['filter'], display_id)
|
||||
entries = [
|
||||
self._extract_video(elem, elem['id'])
|
||||
for elem in feed if elem.get('type') == 'VIDEO' and elem.get('id')]
|
||||
return self.playlist_result(
|
||||
entries, display_id, data.get('title'),
|
||||
clean_html(data.get('description')))
|
||||
62
yt_dlp/extractor/eltrecetv.py
Normal file
62
yt_dlp/extractor/eltrecetv.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ElTreceTVIE(InfoExtractor):
|
||||
IE_DESC = 'El Trece TV (Argentina)'
|
||||
_VALID_URL = r'https?://(?:www\.)?eltrecetv\.com\.ar/[\w-]+/capitulos/temporada-\d+/(?P<id>[\w-]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.eltrecetv.com.ar/ahora-caigo/capitulos/temporada-2023/programa-del-061023/',
|
||||
'md5': '71a66673dc63f9a5939d97bfe4b311ba',
|
||||
'info_dict': {
|
||||
'id': 'AHCA05102023145553329621094',
|
||||
'ext': 'mp4',
|
||||
'title': 'AHORA CAIGO - Programa 06/10/23',
|
||||
'thumbnail': 'https://thumbs.vodgc.net/AHCA05102023145553329621094.JPG?649339',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.eltrecetv.com.ar/poco-correctos/capitulos/temporada-2023/programa-del-250923-invitada-dalia-gutmann/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.eltrecetv.com.ar/argentina-tierra-de-amor-y-venganza/capitulos/temporada-2023/atav-2-capitulo-121-del-250923/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.eltrecetv.com.ar/ahora-caigo/capitulos/temporada-2023/programa-del-250923/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.eltrecetv.com.ar/pasaplatos/capitulos/temporada-2023/pasaplatos-el-restaurante-del-250923/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.eltrecetv.com.ar/el-galpon/capitulos/temporada-2023/programa-del-160923-invitado-raul-lavie/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug = self._match_id(url)
|
||||
webpage = self._download_webpage(url, slug)
|
||||
config = self._search_json(
|
||||
r'Fusion.globalContent\s*=', webpage, 'content', slug)['promo_items']['basic']['embed']['config']
|
||||
video_url = config['m3u8']
|
||||
video_id = self._search_regex(r'/(\w+)\.m3u8', video_url, 'video id', default=slug)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', m3u8_id='hls')
|
||||
formats.extend([{
|
||||
'url': f['url'][:-23],
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'width': f.get('width'),
|
||||
'height': f.get('height'),
|
||||
} for f in formats if f['url'].endswith('/tracks-v1a1/index.m3u8') and f.get('height') != 1080])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': config.get('title'),
|
||||
'thumbnail': config.get('thumbnail'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -106,4 +106,4 @@ class EmbedlyIE(InfoExtractor):
|
||||
return self.url_result(src, YoutubeTabIE)
|
||||
return self.url_result(smuggle_url(
|
||||
urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))),
|
||||
{'http_headers': {'Referer': url}}))
|
||||
{'referer': url}))
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# video with vidible ID
|
||||
'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('aol-video:%s' % video_id)
|
||||
107
yt_dlp/extractor/epidemicsound.py
Normal file
107
yt_dlp/extractor/epidemicsound.py
Normal file
@@ -0,0 +1,107 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class EpidemicSoundIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
|
||||
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
|
||||
'info_dict': {
|
||||
'id': '45014',
|
||||
'display_id': 'yFfQVRpSPz',
|
||||
'ext': 'mp3',
|
||||
'title': 'Door Knock Door 1',
|
||||
'alt_title': 'Door Knock Door 1',
|
||||
'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'],
|
||||
'categories': ['Misc. Door'],
|
||||
'duration': 1,
|
||||
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
|
||||
'timestamp': 1415320353,
|
||||
'upload_date': '20141107',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/',
|
||||
'md5': 'c82b745890f9baf18dc2f8d568ee3830',
|
||||
'info_dict': {
|
||||
'id': '148700',
|
||||
'display_id': 'mj8GTTwsZd',
|
||||
'ext': 'mp3',
|
||||
'title': 'Noplace',
|
||||
'tags': ['liquid drum n bass', 'energetic'],
|
||||
'categories': ['drum and bass'],
|
||||
'duration': 237,
|
||||
'timestamp': 1694426482,
|
||||
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg',
|
||||
'upload_date': '20230911',
|
||||
'release_timestamp': 1700535606,
|
||||
'release_date': '20231121',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _epidemic_parse_thumbnail(url: str):
|
||||
if not url_or_none(url):
|
||||
return None
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
**(traverse_obj(url, ({parse_qs}, {
|
||||
'width': ('width', 0, {int_or_none}),
|
||||
'height': ('height', 0, {int_or_none}),
|
||||
})) or parse_resolution(url)),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _epidemic_fmt_or_none(f):
|
||||
if not f.get('format'):
|
||||
f['format'] = f.get('format_id')
|
||||
elif not f.get('format_id'):
|
||||
f['format_id'] = f['format']
|
||||
if not f['url'] or not f['format']:
|
||||
return None
|
||||
if f.get('format_note'):
|
||||
f['format_note'] = f'track ID {f["format_note"]}'
|
||||
if f['format'] != 'full':
|
||||
f['preference'] = -2
|
||||
return f
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id)
|
||||
|
||||
thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
|
||||
thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
|
||||
if thumb_base_url:
|
||||
thumbnails.extend(traverse_obj(json_data, (
|
||||
'coverArt', 'sizes', ..., {thumb_base_url.__add__})))
|
||||
|
||||
return traverse_obj(json_data, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'display_id': ('publicSlug', {str}),
|
||||
'title': ('title', {str}),
|
||||
'alt_title': ('oldTitle', {str}),
|
||||
'duration': ('length', {float_or_none}),
|
||||
'timestamp': ('added', {parse_iso8601}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'categories': ('genres', ..., 'tag', {str}),
|
||||
'tags': ('metadataTags', ..., {str}),
|
||||
'age_limit': ('isExplicit', {lambda b: 18 if b else None}),
|
||||
'thumbnails': ({lambda _: thumbnails}, {orderedSet}, ..., {self._epidemic_parse_thumbnail}),
|
||||
'formats': ('stems', {dict.items}, ..., {
|
||||
'format': (0, {str_or_none}),
|
||||
'format_note': (1, 's3TrackId', {str_or_none}),
|
||||
'format_id': (1, 'stemType', {str}),
|
||||
'url': (1, 'lqMp3Url', {url_or_none}),
|
||||
}, {self._epidemic_fmt_or_none}),
|
||||
})
|
||||
@@ -1,15 +1,20 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class EplusIbIE(InfoExtractor):
|
||||
IE_NAME = 'eplus:inbound'
|
||||
IE_DESC = 'e+ (イープラス) overseas'
|
||||
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
|
||||
_NETRC_MACHINE = 'eplus'
|
||||
IE_NAME = 'eplus'
|
||||
IE_DESC = 'e+ (イープラス)'
|
||||
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
||||
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||
'info_dict': {
|
||||
@@ -29,14 +34,97 @@ class EplusIbIE(InfoExtractor):
|
||||
'No video formats found!',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}, {
|
||||
'url': 'https://live.eplus.jp/sample',
|
||||
'info_dict': {
|
||||
'id': 'stream1ng20210719-test-005',
|
||||
'title': 'Online streaming test for DRM',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20210719',
|
||||
'release_timestamp': 1626703200,
|
||||
'description': None,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Could not find the playlist URL. This event may not be accessible',
|
||||
'No video formats found!',
|
||||
'Requested format is not available',
|
||||
'This video is DRM protected',
|
||||
],
|
||||
}, {
|
||||
'url': 'https://live.eplus.jp/2053935',
|
||||
'info_dict': {
|
||||
'id': '331320-0001-001',
|
||||
'title': '丘みどり2020配信LIVE Vol.2 ~秋麗~ 【Streaming+(配信チケット)】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20200920',
|
||||
'release_timestamp': 1600596000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Could not find the playlist URL. This event may not be accessible',
|
||||
'No video formats found!',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'
|
||||
|
||||
def _login(self, username, password, urlh):
|
||||
if not self._get_cookies('https://live.eplus.jp/').get('ci_session'):
|
||||
raise ExtractorError('Unable to get ci_session cookie')
|
||||
|
||||
cltft_token = urlh.headers.get('X-CLTFT-Token')
|
||||
if not cltft_token:
|
||||
raise ExtractorError('Unable to get X-CLTFT-Token')
|
||||
self._set_cookie('live.eplus.jp', 'X-CLTFT-Token', cltft_token)
|
||||
|
||||
login_json = self._download_json(
|
||||
'https://live.eplus.jp/member/api/v1/FTAuth/idpw', None,
|
||||
note='Sending pre-login info', errnote='Unable to send pre-login info', headers={
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
'Referer': urlh.url,
|
||||
'X-Cltft-Token': cltft_token,
|
||||
'Accept': '*/*',
|
||||
}, data=json.dumps({
|
||||
'loginId': username,
|
||||
'loginPassword': password,
|
||||
}).encode())
|
||||
if not login_json.get('isSuccess'):
|
||||
raise ExtractorError('Login failed: Invalid id or password', expected=True)
|
||||
|
||||
self._request_webpage(
|
||||
urlh.url, None, note='Logging in', errnote='Unable to log in',
|
||||
data=urlencode_postdata({
|
||||
'loginId': username,
|
||||
'loginPassword': password,
|
||||
'Token.Default': cltft_token,
|
||||
'op': 'nextPage',
|
||||
}), headers={'Referer': urlh.url})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
url, video_id, headers={'User-Agent': self._USER_AGENT})
|
||||
if urlh.url.startswith('https://live.eplus.jp/member/auth'):
|
||||
username, password = self._get_login_info()
|
||||
if not username:
|
||||
self.raise_login_required()
|
||||
self._login(username, password, urlh)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers={'User-Agent': self._USER_AGENT})
|
||||
|
||||
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
||||
|
||||
if data_json.get('drm_mode') == 'ON':
|
||||
self.report_drm(video_id)
|
||||
|
||||
delivery_status = data_json.get('delivery_status')
|
||||
archive_mode = data_json.get('archive_mode')
|
||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||
@@ -64,7 +152,7 @@ class EplusIbIE(InfoExtractor):
|
||||
formats = []
|
||||
|
||||
m3u8_playlist_urls = self._search_json(
|
||||
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||
r'var\s+listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||
if not m3u8_playlist_urls:
|
||||
if live_status == 'is_upcoming':
|
||||
self.raise_no_formats(
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
def _decrypt_config(key, string):
|
||||
a = ''
|
||||
i = ''
|
||||
r = ''
|
||||
|
||||
while len(a) < (len(string) / 2):
|
||||
a += key
|
||||
|
||||
a = a[0:int(len(string) / 2)]
|
||||
|
||||
t = 0
|
||||
while t < len(string):
|
||||
i += chr(int(string[t] + string[t + 1], 16))
|
||||
t += 2
|
||||
|
||||
icko = [s for s in i]
|
||||
|
||||
for t, c in enumerate(a):
|
||||
r += chr(ord(c) ^ ord(icko[t]))
|
||||
|
||||
return r
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
'info_dict': {
|
||||
'id': '6618',
|
||||
'ext': 'mp4',
|
||||
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||
'title': "Breaking Down Baldur's Gate",
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 264,
|
||||
'uploader': 'The Escapist',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
|
||||
'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
|
||||
'info_dict': {
|
||||
'id': '10044',
|
||||
'ext': 'mp4',
|
||||
'description': 'This week, Zero Punctuation reviews Evolve.',
|
||||
'title': 'Evolve - One vs Multiplayer',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 304,
|
||||
'uploader': 'The Escapist',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
ims_video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
|
||||
video_id)
|
||||
video_id = ims_video['videoID']
|
||||
key = ims_video['hash']
|
||||
|
||||
config = self._download_webpage(
|
||||
'http://www.escapistmagazine.com/videos/vidconfig.php',
|
||||
video_id, 'Downloading video config', headers={
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'videoID': video_id,
|
||||
'hash': key,
|
||||
})
|
||||
|
||||
data = self._parse_json(_decrypt_config(key, config), video_id)
|
||||
|
||||
video_data = data['videoData']
|
||||
|
||||
title = clean_html(video_data['title'])
|
||||
|
||||
formats = [{
|
||||
'url': video['src'],
|
||||
'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
|
||||
'height': int_or_none(video.get('res')),
|
||||
} for video in data['files']['videos']]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'),
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'uploader': video_data.get('publisher'),
|
||||
'series': video_data.get('show'),
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class EsriVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications',
|
||||
'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc',
|
||||
'info_dict': {
|
||||
'id': '1124',
|
||||
'ext': 'mp4',
|
||||
'title': 'ArcGIS Online - Developing Applications',
|
||||
'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 185,
|
||||
'upload_date': '20120419',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for width, height, content in re.findall(
|
||||
r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage):
|
||||
for video_url, ext, filesize in re.findall(
|
||||
r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content):
|
||||
formats.append({
|
||||
'url': compat_urlparse.urljoin(url, video_url),
|
||||
'ext': ext.lower(),
|
||||
'format_id': '%s-%s' % (ext.lower(), height),
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
'filesize_approx': parse_filesize(filesize),
|
||||
})
|
||||
|
||||
title = self._html_search_meta('title', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description', fatal=False)
|
||||
|
||||
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
[r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"],
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'last-modified', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ExpoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
|
||||
'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
|
||||
'info_dict': {
|
||||
'id': '667916',
|
||||
'ext': 'mp4',
|
||||
'title': 'NYX Butter Lipstick Little Susie',
|
||||
'description': 'Goes on like butter, but looks better!',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Stephanie S.',
|
||||
'upload_date': '20150520',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_key = self._search_regex(
|
||||
r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
|
||||
config = self._download_json(
|
||||
'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key),
|
||||
video_id, 'Downloading video configuration')
|
||||
|
||||
formats = []
|
||||
for fcfg in config['sources']:
|
||||
media_url = fcfg.get('file')
|
||||
if not media_url:
|
||||
continue
|
||||
if fcfg.get('type') == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'height': int_or_none(fcfg.get('height')),
|
||||
'format_id': fcfg.get('label'),
|
||||
'ext': self._search_regex(
|
||||
r'filename=.*\.([a-z0-9_A-Z]+)&', media_url,
|
||||
'file extension', default=None) or fcfg.get('type'),
|
||||
})
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = config.get('image')
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
|
||||
uploader = self._search_regex(
|
||||
r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
|
||||
fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
|
||||
fatal=False), day_first=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
from ..utils import str_to_int
|
||||
from .keezmovies import KeezMoviesIE
|
||||
|
||||
|
||||
class ExtremeTubeIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '92feaafa4b58e82f261e5419f39c60cb',
|
||||
'info_dict': {
|
||||
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'ext': 'mp4',
|
||||
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||
'uploader': 'anonim',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/gay/video/abcde-1234',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/video/652431',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
if not info['title']:
|
||||
info['title'] = self._search_regex(
|
||||
r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
info.update({
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
})
|
||||
|
||||
return info
|
||||
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_id,
|
||||
get_first,
|
||||
int_or_none,
|
||||
@@ -51,12 +52,12 @@ class FacebookIE(InfoExtractor):
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
groups/[^/]+/permalink/|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?P<id>pfbid[A-Za-z0-9]+|\d+)
|
||||
'''
|
||||
_EMBED_REGEX = [
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
||||
@@ -231,6 +232,39 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader_id': '100013949973717',
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
|
||||
'info_dict': {
|
||||
'id': '1569199726448814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pence MUST GO!',
|
||||
'description': 'Vickie Gentry shared a memory.',
|
||||
'timestamp': 1511548260,
|
||||
'upload_date': '20171124',
|
||||
'uploader': 'Vickie Gentry',
|
||||
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
|
||||
'info_dict': {
|
||||
'id': '6968553779868435',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
|
||||
'uploader': 'ATTN:',
|
||||
'upload_date': '20231207',
|
||||
'title': 'ATTN:',
|
||||
'duration': 132.675,
|
||||
'uploader_id': '100064451419378',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'timestamp': 1701975646,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@@ -420,6 +454,29 @@ class FacebookIE(InfoExtractor):
|
||||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
|
||||
is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
|
||||
captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
|
||||
if url_or_none(captions): # if subs_data only had a 'captions_url'
|
||||
locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
subtitles[locale] = [{'url': captions}]
|
||||
# or else subs_data had 'video_available_captions_locales', a list of dicts
|
||||
for caption in traverse_obj(captions, (
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
|
||||
):
|
||||
lang = caption.get('localized_language') or ''
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_video_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
|
||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
@@ -463,6 +520,8 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@@ -586,9 +645,11 @@ class FacebookIE(InfoExtractor):
|
||||
nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
|
||||
attachments = traverse_obj(nodes, (
|
||||
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
|
||||
..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or []
|
||||
..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
|
||||
'attachment', {dict}))
|
||||
for attachment in attachments:
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
|
||||
('target', 'attachments', ..., 'styles', 'attachment', {dict}))
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
@@ -611,7 +672,7 @@ class FacebookIE(InfoExtractor):
|
||||
if len(entries) > 1:
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
video_info = entries[0]
|
||||
video_info = entries[0] if entries else {'id': video_id}
|
||||
webpage_info = extract_metadata(webpage)
|
||||
# honor precise duration in video info
|
||||
if video_info.get('duration'):
|
||||
|
||||
@@ -2,11 +2,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..dependencies import websockets
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
WebSocketsWrapper,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
@@ -167,8 +165,6 @@ class FC2LiveIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
if not websockets:
|
||||
raise ExtractorError('websockets library is not available. Please install it.', expected=True)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id)
|
||||
|
||||
@@ -199,13 +195,9 @@ class FC2LiveIE(InfoExtractor):
|
||||
ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
|
||||
playlist_data = None
|
||||
|
||||
self.to_screen('%s: Fetching HLS playlist info via WebSocket' % video_id)
|
||||
ws = WebSocketsWrapper(ws_url, {
|
||||
'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
|
||||
ws = self._request_webpage(Request(ws_url, headers={
|
||||
'Origin': 'https://live.fc2.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': self.get_param('http_headers')['User-Agent'],
|
||||
})
|
||||
}), video_id, note='Fetching HLS playlist info via WebSocket')
|
||||
|
||||
self.write_debug('Sending HLS server request')
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FifaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://www\.fifa\.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
|
||||
'info_dict': {
|
||||
|
||||
@@ -3,7 +3,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class FilmmoduIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?filmmodu.org/(?P<id>[^/]+-(?:turkce-dublaj-izle|altyazili-izle))'
|
||||
_VALID_URL = r'https?://(?:www\.)?filmmodu\.org/(?P<id>[^/]+-(?:turkce-dublaj-izle|altyazili-izle))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.filmmodu.org/f9-altyazili-izle',
|
||||
'md5': 'aeefd955c2a508a5bdaa3bcec8eeb0d4',
|
||||
|
||||
268
yt_dlp/extractor/floatplane.py
Normal file
268
yt_dlp/extractor/floatplane.py
Normal file
@@ -0,0 +1,268 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_codecs,
|
||||
parse_iso8601,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FloatplaneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.floatplane.com/post/2Yf3UedF7C',
|
||||
'info_dict': {
|
||||
'id': 'yuleLogLTT',
|
||||
'ext': 'mp4',
|
||||
'display_id': '2Yf3UedF7C',
|
||||
'title': '8K Yule Log Fireplace with Crackling Fire Sounds - 10 Hours',
|
||||
'description': 'md5:adf2970e0de1c5e3df447818bb0309f6',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 36035,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'release_date': '20191206',
|
||||
'release_timestamp': 1575657000,
|
||||
'uploader': 'LinusTechTips',
|
||||
'uploader_id': '59f94c0bdd241b70349eb72b',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
|
||||
'channel': 'Linus Tech Tips',
|
||||
'channel_id': '63fe42c309e691e4e36de93d',
|
||||
'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/main',
|
||||
'availability': 'subscriber_only',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.floatplane.com/post/j2jqG3JmgJ',
|
||||
'info_dict': {
|
||||
'id': 'j2jqG3JmgJ',
|
||||
'title': 'TJM: Does Anyone Care About Avatar: The Way of Water?',
|
||||
'description': 'md5:00bf17dc5733e4031e99b7fd6489f274',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'release_timestamp': 1671915900,
|
||||
'release_date': '20221224',
|
||||
'uploader': 'LinusTechTips',
|
||||
'uploader_id': '59f94c0bdd241b70349eb72b',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
|
||||
'channel': "They're Just Movies",
|
||||
'channel_id': '64135f82fc76ab7f9fbdc876',
|
||||
'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/tajm',
|
||||
'availability': 'subscriber_only',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://www.floatplane.com/post/3tK2tInhoN',
|
||||
'info_dict': {
|
||||
'id': '3tK2tInhoN',
|
||||
'title': 'Extras - How Linus Communicates with Editors (Compensator 4)',
|
||||
'description': 'md5:83cd40aae1ce124df33769600c80ca5b',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'release_timestamp': 1700529120,
|
||||
'release_date': '20231121',
|
||||
'uploader': 'LinusTechTips',
|
||||
'uploader_id': '59f94c0bdd241b70349eb72b',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
|
||||
'channel': 'FP Exclusives',
|
||||
'channel_id': '6413623f5b12cca228a28e78',
|
||||
'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/fpexclusive',
|
||||
'availability': 'subscriber_only',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://beta.floatplane.com/post/d870PEFXS1',
|
||||
'info_dict': {
|
||||
'id': 'bg9SuYKEww',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'd870PEFXS1',
|
||||
'title': 'LCS Drama, TLOU 2 Remaster, Destiny 2 Player Count Drops, + More!',
|
||||
'description': 'md5:80d612dcabf41b17487afcbe303ec57d',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'release_timestamp': 1700622000,
|
||||
'release_date': '20231122',
|
||||
'duration': 513,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'LinusTechTips',
|
||||
'uploader_id': '59f94c0bdd241b70349eb72b',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home',
|
||||
'channel': 'GameLinked',
|
||||
'channel_id': '649dbade3540dbc3945eeda7',
|
||||
'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/gamelinked',
|
||||
'availability': 'subscriber_only',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://www.floatplane.com').get('sails.sid'):
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
post_data = self._download_json(
|
||||
'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id},
|
||||
note='Downloading post data', errnote='Unable to download post data')
|
||||
|
||||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
media_typ = media.get('type') or 'video'
|
||||
|
||||
metadata = self._download_json(
|
||||
f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id},
|
||||
note=f'Downloading {media_typ} metadata')
|
||||
|
||||
stream = self._download_json(
|
||||
'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={
|
||||
'type': 'vod' if media_typ == 'video' else 'aod',
|
||||
'guid': metadata['guid']
|
||||
}, note=f'Downloading {media_typ} stream data')
|
||||
|
||||
path_template = traverse_obj(stream, ('resource', 'uri', {str}))
|
||||
|
||||
def format_path(params):
|
||||
path = path_template
|
||||
for i, val in (params or {}).items():
|
||||
path = path.replace(f'{{qualityLevelParams.{i}}}', val)
|
||||
return path
|
||||
|
||||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name']))))
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_id': 'name',
|
||||
'format_note': 'label',
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
**parse_codecs(quality.get('codecs')),
|
||||
'url': url,
|
||||
'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
|
||||
})
|
||||
|
||||
items.append({
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
|
||||
|
||||
post_info = {
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': 'title',
|
||||
'description': ('text', {clean_html}),
|
||||
'uploader': ('creator', 'title'),
|
||||
'uploader_id': ('creator', 'id'),
|
||||
'channel': ('channel', 'title'),
|
||||
'channel_id': ('channel', 'id'),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
}),
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': channel_url,
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
return self.playlist_result(items, **post_info)
|
||||
|
||||
post_info.update(items[0])
|
||||
return post_info
|
||||
|
||||
|
||||
class FloatplaneChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
|
||||
_PAGE_SIZE = 20
|
||||
_TESTS = [{
|
||||
'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo',
|
||||
'info_dict': {
|
||||
'id': 'linustechtips/ltxexpo',
|
||||
'title': 'LTX Expo',
|
||||
'description': 'md5:9819002f9ebe7fd7c75a3a1d38a59149',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'https://www.floatplane.com/channel/ShankMods/home',
|
||||
'info_dict': {
|
||||
'id': 'ShankMods',
|
||||
'title': 'Shank Mods',
|
||||
'description': 'md5:6dff1bb07cad8e5448e04daad9be1b30',
|
||||
},
|
||||
'playlist_mincount': 14,
|
||||
}, {
|
||||
'url': 'https://beta.floatplane.com/channel/bitwit_ultra/home',
|
||||
'info_dict': {
|
||||
'id': 'bitwit_ultra',
|
||||
'title': 'Bitwit Ultra',
|
||||
'description': 'md5:1452f280bb45962976d4789200f676dd',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, display_id, creator_id, channel_id, page):
|
||||
query = {
|
||||
'id': creator_id,
|
||||
'limit': self._PAGE_SIZE,
|
||||
'fetchAfter': page * self._PAGE_SIZE,
|
||||
}
|
||||
if channel_id:
|
||||
query['channel'] = channel_id
|
||||
page_data = self._download_json(
|
||||
'https://www.floatplane.com/api/v3/content/creator', display_id,
|
||||
query=query, note=f'Downloading page {page + 1}')
|
||||
for post in page_data or []:
|
||||
yield self.url_result(
|
||||
f'https://www.floatplane.com/post/{post["id"]}',
|
||||
FloatplaneIE, id=post['id'], title=post.get('title'),
|
||||
release_timestamp=parse_iso8601(post.get('releaseDate')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
creator, channel = self._match_valid_url(url).group('id', 'channel')
|
||||
display_id = join_nonempty(creator, channel, delim='/')
|
||||
|
||||
creator_data = self._download_json(
|
||||
'https://www.floatplane.com/api/v3/creator/named',
|
||||
display_id, query={'creatorURL[0]': creator})[0]
|
||||
|
||||
channel_data = traverse_obj(
|
||||
creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {}
|
||||
|
||||
return self.playlist_result(OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE),
|
||||
display_id, title=channel_data.get('title') or creator_data.get('title'),
|
||||
description=channel_data.get('about') or creator_data.get('about'))
|
||||
@@ -1,106 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj, unified_timestamp
|
||||
|
||||
|
||||
class FourZeroStudioArchiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/broadcasts/(?P<id>[^/]+)/archive'
|
||||
IE_NAME = '0000studio:archive'
|
||||
_TESTS = [{
|
||||
'url': 'https://0000.studio/mumeijiten/broadcasts/1290f433-fce0-4909-a24a-5f7df09665dc/archive',
|
||||
'info_dict': {
|
||||
'id': '1290f433-fce0-4909-a24a-5f7df09665dc',
|
||||
'title': 'noteで『canape』様へのファンレターを執筆します。(数秘術その2)',
|
||||
'timestamp': 1653802534,
|
||||
'release_timestamp': 1653796604,
|
||||
'thumbnails': 'count:1',
|
||||
'comments': 'count:7',
|
||||
'uploader': '『中崎雄心』の執務室。',
|
||||
'uploader_id': 'mumeijiten',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
|
||||
|
||||
pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False)
|
||||
uploader_internal_id = traverse_obj(nuxt_data, (
|
||||
'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False)
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': pcb.get('title'),
|
||||
'age_limit': 18 if pcb.get('isAdult') else None,
|
||||
'timestamp': unified_timestamp(pcb.get('finishTime')),
|
||||
'release_timestamp': unified_timestamp(pcb.get('createdAt')),
|
||||
'thumbnails': [{
|
||||
'url': pcb['thumbnailUrl'],
|
||||
'ext': 'png',
|
||||
}] if pcb.get('thumbnailUrl') else None,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'comments': [{
|
||||
'author': c.get('username'),
|
||||
'author_id': c.get('postedUserId'),
|
||||
'author_thumbnail': c.get('userThumbnailUrl'),
|
||||
'id': c.get('id'),
|
||||
'text': c.get('body'),
|
||||
'timestamp': unified_timestamp(c.get('createdAt')),
|
||||
'like_count': c.get('likeCount'),
|
||||
'is_favorited': c.get('isLikedByOwner'),
|
||||
'author_is_uploader': c.get('postedUserId') == uploader_internal_id,
|
||||
} for c in traverse_obj(nuxt_data, (
|
||||
'ssrRefs', ..., lambda _, v: v['__typename'] == 'PublicCreatorBroadcastComment')) or []],
|
||||
'uploader_id': uploader_id,
|
||||
'uploader': traverse_obj(nuxt_data, (
|
||||
'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class FourZeroStudioClipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/archive-clip/(?P<id>[^/]+)'
|
||||
IE_NAME = '0000studio:clip'
|
||||
_TESTS = [{
|
||||
'url': 'https://0000.studio/soeji/archive-clip/e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
|
||||
'info_dict': {
|
||||
'id': 'e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
|
||||
'title': 'わたベーさんからイラスト差し入れいただきました。ありがとうございました!',
|
||||
'timestamp': 1652109105,
|
||||
'like_count': 1,
|
||||
'uploader': 'ソエジマケイタ',
|
||||
'uploader_id': 'soeji',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
|
||||
|
||||
clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False)
|
||||
|
||||
info = next((
|
||||
m for m in self._parse_html5_media_entries(url, webpage, video_id)
|
||||
if 'mp4' in traverse_obj(m, ('formats', ..., 'ext'))
|
||||
), None)
|
||||
if not info:
|
||||
self.report_warning('Failed to find a desired media element. Falling back to using NUXT data.')
|
||||
info = {
|
||||
'formats': [{
|
||||
'ext': 'mp4',
|
||||
'url': url,
|
||||
} for url in clip_info.get('mediaFiles') or [] if url],
|
||||
}
|
||||
return {
|
||||
**info,
|
||||
'id': video_id,
|
||||
'title': clip_info.get('clipComment'),
|
||||
'timestamp': unified_timestamp(clip_info.get('createdAt')),
|
||||
'like_count': clip_info.get('likeCount'),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader': traverse_obj(nuxt_data, (
|
||||
'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class FoxgayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
|
||||
_TEST = {
|
||||
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
|
||||
'md5': '344558ccfea74d33b7adbce22e577f54',
|
||||
'info_dict': {
|
||||
'id': '2582',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck Turkish-style',
|
||||
'description': 'md5:6ae2d9486921891efe89231ace13ffdf',
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._html_extract_title(webpage), ' - Foxgay.com')
|
||||
description = get_element_by_id('inf_tit', webpage)
|
||||
|
||||
# The default user-agent with foxgay cookies leads to pages without videos
|
||||
self.cookiejar.clear('.foxgay.com')
|
||||
# Find the URL for the iFrame which contains the actual video.
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
|
||||
'video frame', group='url')
|
||||
iframe = self._download_webpage(
|
||||
iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'},
|
||||
note='Downloading video frame')
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id)
|
||||
|
||||
formats = [{
|
||||
'url': source,
|
||||
'height': int_or_none(resolution),
|
||||
} for source, resolution in zip(
|
||||
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': video_data.get('act_vid', {}).get('thumb'),
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -1,12 +1,14 @@
|
||||
from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@@ -82,6 +84,8 @@ class FranceTVIE(InfoExtractor):
|
||||
videos = []
|
||||
title = None
|
||||
subtitle = None
|
||||
episode_number = None
|
||||
season_number = None
|
||||
image = None
|
||||
duration = None
|
||||
timestamp = None
|
||||
@@ -112,7 +116,9 @@ class FranceTVIE(InfoExtractor):
|
||||
if meta:
|
||||
if title is None:
|
||||
title = meta.get('title')
|
||||
# XXX: what is meta['pre_title']?
|
||||
# meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
|
||||
season_number, episode_number = self._search_regex(
|
||||
r'S(\d+)\s*E(\d+)', meta.get('pre_title'), 'episode info', group=(1, 2), default=(None, None))
|
||||
if subtitle is None:
|
||||
subtitle = meta.get('additional_title')
|
||||
if image is None:
|
||||
@@ -191,19 +197,19 @@ class FranceTVIE(InfoExtractor):
|
||||
} for sheet in spritesheets]
|
||||
})
|
||||
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
title = title.strip()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': join_nonempty(title, subtitle, delim=' - ').strip(),
|
||||
'thumbnail': image,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'episode': subtitle if episode_number else None,
|
||||
'series': title if episode_number else None,
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'season_number': int_or_none(season_number),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -230,14 +236,31 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1502623500,
|
||||
'duration': 2580,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
|
||||
'info_dict': {
|
||||
'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
|
||||
'ext': 'mp4',
|
||||
'title': 'Foot2Rue - Duel au vieux port',
|
||||
'episode': 'Duel au vieux port',
|
||||
'series': 'Foot2Rue',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'timestamp': 1642761360,
|
||||
'upload_date': '20220121',
|
||||
'season': 'Season 1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1441,
|
||||
},
|
||||
}, {
|
||||
# france3
|
||||
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class FusionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'info_dict': {
|
||||
'id': '3145868',
|
||||
'ext': 'mp4',
|
||||
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
|
||||
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
|
||||
'duration': 140.0,
|
||||
'timestamp': 1442589635,
|
||||
'uploader': 'UNIVISON',
|
||||
'upload_date': '20150918',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Anvato'],
|
||||
}, {
|
||||
'url': 'http://fusion.tv/video/201781',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._download_json(
|
||||
'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video.get('excerpt'),
|
||||
'timestamp': parse_iso8601(video.get('published')),
|
||||
'series': video.get('show'),
|
||||
}
|
||||
|
||||
formats = []
|
||||
src = video.get('src') or {}
|
||||
for f_id, f in src.items():
|
||||
for q_id, q in f.items():
|
||||
q_url = q.get('url')
|
||||
if not q_url:
|
||||
continue
|
||||
ext = determine_ext(q_url, mimetype2ext(q.get('type')))
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False))
|
||||
elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '-'.join([f_id, q_id]),
|
||||
'url': q_url,
|
||||
'width': int_or_none(q.get('width')),
|
||||
'height': int_or_none(q.get('height')),
|
||||
'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
})
|
||||
if formats:
|
||||
info['formats'] = formats
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': 'anvato:uni:' + video['video_ids']['anvato'],
|
||||
'ie_key': 'Anvato',
|
||||
})
|
||||
|
||||
return info
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
extract_basic_auth,
|
||||
filter_dict,
|
||||
format_field,
|
||||
int_or_none,
|
||||
is_html,
|
||||
@@ -35,6 +36,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
urljoin,
|
||||
variadic,
|
||||
xpath_attr,
|
||||
@@ -372,46 +374,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
},
|
||||
# ooyala video
|
||||
{
|
||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||
'md5': '166dd577b433b4d4ebfee10b0824d8ff',
|
||||
'info_dict': {
|
||||
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
||||
'ext': 'mp4',
|
||||
'title': '2cc213299525360.mov', # that's what we get
|
||||
'duration': 238.231,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
{
|
||||
# ooyala video embedded with http://player.ooyala.com/iframe.js
|
||||
'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
|
||||
'info_dict': {
|
||||
'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
|
||||
'ext': 'mp4',
|
||||
'title': '"Steve Jobs: Man in the Machine" trailer',
|
||||
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
|
||||
'duration': 135.427,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'movie expired',
|
||||
},
|
||||
# ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
|
||||
{
|
||||
'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
|
||||
'info_dict': {
|
||||
'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Steampunk Fest Comes to Honesdale',
|
||||
'duration': 43.276,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# embed.ly video
|
||||
{
|
||||
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||
@@ -504,7 +466,8 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Ужастики, русский трейлер (2015)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 153,
|
||||
}
|
||||
},
|
||||
'skip': 'Site dead',
|
||||
},
|
||||
# XHamster embed
|
||||
{
|
||||
@@ -776,14 +739,16 @@ class GenericIE(InfoExtractor):
|
||||
'playlist_mincount': 1,
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
# Cinchcast embed
|
||||
# Libsyn embed
|
||||
{
|
||||
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
|
||||
'info_dict': {
|
||||
'id': '7141703',
|
||||
'id': '3793998',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20141126',
|
||||
'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
|
||||
'title': 'Underground Wellness Radio - Jack Tips: 5 Steps to Permanent Gut Healing',
|
||||
'thumbnail': 'https://assets.libsyn.com/secure/item/3793998/?height=90&width=90',
|
||||
'duration': 3989.0,
|
||||
}
|
||||
},
|
||||
# Cinerama player
|
||||
@@ -1565,16 +1530,6 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
|
||||
},
|
||||
},
|
||||
{
|
||||
# vzaar embed
|
||||
'url': 'http://help.vzaar.com/article/165-embedding-video',
|
||||
'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
|
||||
'info_dict': {
|
||||
'id': '8707641',
|
||||
'ext': 'mp4',
|
||||
'title': 'Building A Business Online: Principal Chairs Q & A',
|
||||
},
|
||||
},
|
||||
{
|
||||
# multiple HTML5 videos on one page
|
||||
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
|
||||
@@ -2434,10 +2389,10 @@ class GenericIE(InfoExtractor):
|
||||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers={
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
**smuggled_data.get('http_headers', {})
|
||||
})
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}))
|
||||
new_url = full_response.url
|
||||
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
@@ -2457,9 +2412,9 @@ class GenericIE(InfoExtractor):
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = smuggled_data.get('http_headers', {})
|
||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||
format_id = str(m.group('format_id'))
|
||||
ext = determine_ext(url)
|
||||
ext = determine_ext(url, default_ext=None) or urlhandle_detect_ext(full_response)
|
||||
subtitles = {}
|
||||
if format_id.endswith('mpegurl') or ext == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
||||
@@ -2471,6 +2426,7 @@ class GenericIE(InfoExtractor):
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
info_dict['direct'] = True
|
||||
@@ -2708,7 +2664,7 @@ class GenericIE(InfoExtractor):
|
||||
'url': smuggle_url(json_ld['url'], {
|
||||
'force_videoid': video_id,
|
||||
'to_generic': True,
|
||||
'http_headers': {'Referer': url},
|
||||
'referer': url,
|
||||
}),
|
||||
}, json_ld)]
|
||||
|
||||
|
||||
179
yt_dlp/extractor/getcourseru.py
Normal file
179
yt_dlp/extractor/getcourseru.py
Normal file
@@ -0,0 +1,179 @@
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GetCourseRuPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
|
||||
_TESTS = [{
|
||||
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'title': '190bdf93f1b29735309853a7a19e24b3',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, None, 'Downloading player page')
|
||||
window_configs = self._search_json(
|
||||
r'window\.configs\s*=', webpage, 'config', None)
|
||||
video_id = str(window_configs['gcFileId'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
window_configs['masterPlaylistUrl'], video_id)
|
||||
|
||||
return {
|
||||
**traverse_obj(window_configs, {
|
||||
'title': ('videoHash', {str}),
|
||||
'thumbnail': ('previewUrl', {url_or_none}),
|
||||
'duration': ('videoDuration', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
|
||||
class GetCourseRuIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'getcourseru'
|
||||
_DOMAINS = [
|
||||
'academymel.online',
|
||||
'marafon.mani-beauty.com',
|
||||
'on.psbook.ru'
|
||||
]
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
'info_dict': {
|
||||
'id': '3059742',
|
||||
'display_id': '3video_1',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'ext': 'mp4',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://academymel.getcourse.ru/3video_1',
|
||||
'info_dict': {
|
||||
'id': '3059742',
|
||||
'display_id': '3video_1',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'ext': 'mp4',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
|
||||
'info_dict': {
|
||||
'id': '319141781',
|
||||
'title': '1. Разминка у стены',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '4919601',
|
||||
'ext': 'mp4',
|
||||
'title': '1. Разминка у стены',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
|
||||
'duration': 704
|
||||
},
|
||||
}],
|
||||
'skip': 'paid lesson'
|
||||
}, {
|
||||
'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
|
||||
'info_dict': {
|
||||
'id': '272499894',
|
||||
'title': 'Мотивация к тренировкам',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '447479687',
|
||||
'ext': 'mp4',
|
||||
'title': 'Мотивация к тренировкам',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
|
||||
'duration': 30
|
||||
},
|
||||
}],
|
||||
'skip': 'paid lesson'
|
||||
}, {
|
||||
'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL_PATH = '/cms/system/login'
|
||||
|
||||
def _login(self, hostname, username, password):
|
||||
if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
|
||||
return
|
||||
login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
|
||||
webpage = self._download_webpage(login_url, None)
|
||||
|
||||
self._request_webpage(
|
||||
login_url, None, 'Logging in', 'Failed to log in',
|
||||
data=urlencode_postdata({
|
||||
'action': 'processXdget',
|
||||
'xdgetId': self._html_search_regex(
|
||||
r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
|
||||
webpage, 'xdgetId'),
|
||||
'params[action]': 'login',
|
||||
'params[url]': login_url,
|
||||
'params[object_type]': 'cms_page',
|
||||
'params[object_id]': -1,
|
||||
'params[email]': username,
|
||||
'params[password]': password,
|
||||
'requestTime': int(time.time()),
|
||||
'requestSimpleSign': self._html_search_regex(
|
||||
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
hostname = urllib.parse.urlparse(url).hostname
|
||||
username, password = self._get_login_info(netrc_machine=hostname)
|
||||
if username:
|
||||
self._login(hostname, username, password)
|
||||
|
||||
display_id = self._match_id(url)
|
||||
# NB: 404 is returned due to yt-dlp not properly following redirects #9020
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
|
||||
if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
|
||||
raise ExtractorError(
|
||||
f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
|
||||
expected=True)
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
|
||||
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
|
||||
playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
|
||||
'url_transparent': True,
|
||||
'title': title,
|
||||
})
|
||||
@@ -1,145 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
qualities,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
|
||||
_EMBED_REGEX = [rf'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
'id': 'DeadlyDecisiveGermanpinscher',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ghost in the Shell',
|
||||
'timestamp': 1410656006,
|
||||
'upload_date': '20140914',
|
||||
'uploader': 'anonymous',
|
||||
'duration': 10.4,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'anonymous',
|
||||
'description': '',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
|
||||
'info_dict': {
|
||||
'id': 'JauntyTimelyAmazontreeboa',
|
||||
'ext': 'mp4',
|
||||
'title': 'JauntyTimelyAmazontreeboa',
|
||||
'timestamp': 1411720126,
|
||||
'upload_date': '20140926',
|
||||
'uploader': 'anonymous',
|
||||
'duration': 3.52,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'anonymous',
|
||||
'description': '',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/alienatedsolidgreathornedowl',
|
||||
'info_dict': {
|
||||
'id': 'alienatedsolidgreathornedowl',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20211226',
|
||||
'uploader_id': 'reactions',
|
||||
'timestamp': 1640536930,
|
||||
'like_count': int,
|
||||
'description': '',
|
||||
'title': 'Ingrid Michaelson, Zooey Deschanel - Merry Christmas Happy New Year',
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
'duration': 2.9583333333333335,
|
||||
'uploader': 'Reaction GIFs',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://gfycat.com/IFR/JauntyTimelyAmazontreeboa',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
gfy = self._download_json(
|
||||
'https://api.gfycat.com/v1/gfycats/%s' % video_id,
|
||||
video_id, 'Downloading video info')
|
||||
if 'error' in gfy:
|
||||
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
||||
gfy = gfy['gfyItem']
|
||||
|
||||
title = gfy.get('title') or gfy['gfyName']
|
||||
description = gfy.get('description')
|
||||
timestamp = int_or_none(gfy.get('createDate'))
|
||||
uploader = gfy.get('userName') or gfy.get('username')
|
||||
view_count = int_or_none(gfy.get('views'))
|
||||
like_count = int_or_none(gfy.get('likes'))
|
||||
dislike_count = int_or_none(gfy.get('dislikes'))
|
||||
age_limit = 18 if gfy.get('nsfw') == '1' else 0
|
||||
|
||||
width = int_or_none(gfy.get('width'))
|
||||
height = int_or_none(gfy.get('height'))
|
||||
fps = int_or_none(gfy.get('frameRate'))
|
||||
num_frames = int_or_none(gfy.get('numFrames'))
|
||||
|
||||
duration = float_or_none(num_frames, fps) if num_frames and fps else None
|
||||
|
||||
categories = gfy.get('tags') or gfy.get('extraLemmas') or []
|
||||
|
||||
FORMATS = ('gif', 'webm', 'mp4')
|
||||
quality = qualities(FORMATS)
|
||||
|
||||
formats = []
|
||||
for format_id in FORMATS:
|
||||
video_url = gfy.get('%sUrl' % format_id)
|
||||
if not video_url:
|
||||
continue
|
||||
filesize = int_or_none(gfy.get('%sSize' % format_id))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
'filesize': filesize,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': gfy.get('userDisplayName') or uploader,
|
||||
'uploader_id': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'categories': categories,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -19,9 +19,9 @@ class GoogleDriveIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:docs|drive)\.google\.com/
|
||||
(?:docs|drive|drive\.usercontent)\.google\.com/
|
||||
(?:
|
||||
(?:uc|open)\?.*?id=|
|
||||
(?:uc|open|download)\?.*?id=|
|
||||
file/d/
|
||||
)|
|
||||
video\.google\.com/get_player\?.*?docid=
|
||||
@@ -53,6 +53,9 @@ class GoogleDriveIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
@@ -205,9 +208,10 @@ class GoogleDriveIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
|
||||
source_url = update_url_query(
|
||||
'https://drive.google.com/uc', {
|
||||
'https://drive.usercontent.google.com/download', {
|
||||
'id': video_id,
|
||||
'export': 'download',
|
||||
'confirm': 't',
|
||||
})
|
||||
|
||||
def request_source_file(source_url, kind, data=None):
|
||||
|
||||
@@ -57,8 +57,8 @@ class GoProIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
metadata = self._parse_json(
|
||||
self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id)
|
||||
metadata = self._search_json(
|
||||
r'window\.__reflectData\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
video_info = metadata['collectionMedia'][0]
|
||||
media_data = self._download_json(
|
||||
@@ -99,7 +99,7 @@ class GoProIE(InfoExtractor):
|
||||
'duration': int_or_none(
|
||||
video_info.get('source_duration')),
|
||||
'artist': str_or_none(
|
||||
video_info.get('music_track_artist')),
|
||||
video_info.get('music_track_artist')) or None,
|
||||
'track': str_or_none(
|
||||
video_info.get('music_track_name')),
|
||||
video_info.get('music_track_name')) or None,
|
||||
}
|
||||
|
||||
@@ -31,7 +31,6 @@ class GrouponIE(InfoExtractor):
|
||||
}
|
||||
|
||||
_PROVIDERS = {
|
||||
'ooyala': ('ooyala:%s', 'Ooyala'),
|
||||
'youtube': ('%s', 'Youtube'),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class HarpodeonIE(InfoExtractor):
|
||||
@@ -14,7 +14,7 @@ class HarpodeonIE(InfoExtractor):
|
||||
'title': 'The Smoking Out of Bella Butts',
|
||||
'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77',
|
||||
'creator': 'Vitagraph Company of America',
|
||||
'release_date': '19150101'
|
||||
'release_year': 1915,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.harpodeon.com/preview/The_Smoking_Out_of_Bella_Butts/268068288',
|
||||
@@ -25,7 +25,7 @@ class HarpodeonIE(InfoExtractor):
|
||||
'title': 'The Smoking Out of Bella Butts',
|
||||
'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77',
|
||||
'creator': 'Vitagraph Company of America',
|
||||
'release_date': '19150101'
|
||||
'release_year': 1915,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.harpodeon.com/preview/Behind_the_Screen/421838710',
|
||||
@@ -36,7 +36,7 @@ class HarpodeonIE(InfoExtractor):
|
||||
'title': 'Behind the Screen',
|
||||
'description': 'md5:008972a3dc51fba3965ee517d2ba9155',
|
||||
'creator': 'Lone Star Corporation',
|
||||
'release_date': '19160101'
|
||||
'release_year': 1916,
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -66,5 +66,5 @@ class HarpodeonIE(InfoExtractor):
|
||||
'http_headers': {'Referer': url},
|
||||
'description': self._html_search_meta('description', webpage, fatal=False),
|
||||
'creator': creator,
|
||||
'release_date': unified_strdate(f'{release_year}0101')
|
||||
'release_year': int_or_none(release_year),
|
||||
}
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class HelsinkiIE(InfoExtractor):
|
||||
IE_DESC = 'helsinki.fi'
|
||||
_VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258',
|
||||
'info_dict': {
|
||||
'id': '20258',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tietotekniikkafoorumi-iltapäivä',
|
||||
'description': 'md5:f5c904224d43c133225130fe156a5ee0',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # RTMP
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
params = self._parse_json(self._html_search_regex(
|
||||
r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);',
|
||||
webpage, 'player code'), video_id, transform_source=js_to_json)
|
||||
formats = [{
|
||||
'url': s['file'],
|
||||
'ext': 'mp4',
|
||||
} for s in params['sources']]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage).replace('Video: ', ''),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1,209 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class HitboxIE(InfoExtractor):
|
||||
IE_NAME = 'hitbox'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hitbox.tv/video/203213',
|
||||
'info_dict': {
|
||||
'id': '203213',
|
||||
'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
|
||||
'alt_title': 'hitboxlive - Aug 9th #6',
|
||||
'description': '',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 215.1666,
|
||||
'resolution': 'HD 720p',
|
||||
'uploader': 'hitboxlive',
|
||||
'view_count': int,
|
||||
'timestamp': 1407576133,
|
||||
'upload_date': '20140809',
|
||||
'categories': ['Live Show'],
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_metadata(self, url, video_id):
|
||||
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||
metadata = self._download_json(
|
||||
'%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
|
||||
|
||||
date = 'media_live_since'
|
||||
media_type = 'livestream'
|
||||
if metadata.get('media_type') == 'video':
|
||||
media_type = 'video'
|
||||
date = 'media_date_added'
|
||||
|
||||
video_meta = metadata.get(media_type, [])[0]
|
||||
title = video_meta.get('media_status')
|
||||
alt_title = video_meta.get('media_title')
|
||||
description = clean_html(
|
||||
video_meta.get('media_description')
|
||||
or video_meta.get('media_description_md'))
|
||||
duration = float_or_none(video_meta.get('media_duration'))
|
||||
uploader = video_meta.get('media_user_name')
|
||||
views = int_or_none(video_meta.get('media_views'))
|
||||
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
||||
categories = [video_meta.get('category_name')]
|
||||
thumbs = [{
|
||||
'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||
'width': 320,
|
||||
'height': 180
|
||||
}, {
|
||||
'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||
'width': 768,
|
||||
'height': 432
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'description': description,
|
||||
'ext': 'mp4',
|
||||
'thumbnails': thumbs,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'view_count': views,
|
||||
'timestamp': timestamp,
|
||||
'categories': categories,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_config = self._download_json(
|
||||
'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
for video in player_config['clip']['bitrates']:
|
||||
label = video.get('label')
|
||||
if label == 'Auto':
|
||||
continue
|
||||
video_url = video.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(video.get('bitrate'))
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
if not video_url.startswith('http'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'tbr': bitrate,
|
||||
'format_note': label,
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'tbr': bitrate,
|
||||
'format_note': label,
|
||||
})
|
||||
|
||||
metadata = self._extract_metadata(
|
||||
'https://www.smashcast.tv/api/media/video', video_id)
|
||||
metadata['formats'] = formats
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
class HitboxLiveIE(HitboxIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'hitbox:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hitbox.tv/dimak',
|
||||
'info_dict': {
|
||||
'id': 'dimak',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
|
||||
'timestamp': int,
|
||||
'upload_date': compat_str,
|
||||
'title': compat_str,
|
||||
'uploader': 'Dimak',
|
||||
},
|
||||
'params': {
|
||||
# live
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.smashcast.tv/dimak',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_config = self._download_json(
|
||||
'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
cdns = player_config.get('cdns')
|
||||
servers = []
|
||||
for cdn in cdns:
|
||||
# Subscribe URLs are not playable
|
||||
if cdn.get('rtmpSubscribe') is True:
|
||||
continue
|
||||
base_url = cdn.get('netConnectionUrl')
|
||||
host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
|
||||
if base_url not in servers:
|
||||
servers.append(base_url)
|
||||
for stream in cdn.get('bitrates'):
|
||||
label = stream.get('label')
|
||||
if label == 'Auto':
|
||||
continue
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
bitrate = int_or_none(stream.get('bitrate'))
|
||||
if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
|
||||
if not stream_url.startswith('http'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
'ext': 'mp4',
|
||||
'tbr': bitrate,
|
||||
'format_note': label,
|
||||
'rtmp_live': True,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': '%s/%s' % (base_url, stream_url),
|
||||
'ext': 'mp4',
|
||||
'tbr': bitrate,
|
||||
'rtmp_live': True,
|
||||
'format_note': host,
|
||||
'page_url': url,
|
||||
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
|
||||
})
|
||||
|
||||
metadata = self._extract_metadata(
|
||||
'https://www.smashcast.tv/api/media/live', video_id)
|
||||
metadata['formats'] = formats
|
||||
metadata['is_live'] = True
|
||||
metadata['title'] = metadata.get('title')
|
||||
|
||||
return metadata
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user