mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-27 01:51:24 +00:00
Merge branch 'master' into ke/refactor-move-files-pp
This commit is contained in:
@@ -4,6 +4,7 @@ import copy
|
||||
import datetime as dt
|
||||
import errno
|
||||
import fileinput
|
||||
import functools
|
||||
import http.cookiejar
|
||||
import io
|
||||
import itertools
|
||||
@@ -24,7 +25,7 @@ import traceback
|
||||
import unicodedata
|
||||
|
||||
from .cache import Cache
|
||||
from .compat import functools, urllib # isort: split
|
||||
from .compat import urllib # isort: split
|
||||
from .compat import compat_os_name, urllib_req_to_req
|
||||
from .cookies import LenientSimpleCookie, load_cookies
|
||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||
@@ -158,7 +159,7 @@ from .utils import (
|
||||
write_json_file,
|
||||
write_string,
|
||||
)
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils._utils import _UnsafeExtensionError, _YDLLogger
|
||||
from .utils.networking import (
|
||||
HTTPHeaderDict,
|
||||
clean_headers,
|
||||
@@ -171,6 +172,20 @@ if compat_os_name == 'nt':
|
||||
import ctypes
|
||||
|
||||
|
||||
def _catch_unsafe_extension_error(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
except _UnsafeExtensionError as error:
|
||||
self.report_error(
|
||||
f'The extracted extension ({error.extension!r}) is unusual '
|
||||
'and will be skipped for safety reasons. '
|
||||
f'If you believe this is an error{bug_reports_message(",")}')
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class YoutubeDL:
|
||||
"""YoutubeDL class.
|
||||
|
||||
@@ -453,8 +468,9 @@ class YoutubeDL:
|
||||
Set the value to 'native' to use the native downloader
|
||||
compat_opts: Compatibility options. See "Differences in default behavior".
|
||||
The following options do not work when used through the API:
|
||||
filename, abort-on-error, multistreams, no-live-chat, format-sort
|
||||
no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
|
||||
filename, abort-on-error, multistreams, no-live-chat,
|
||||
format-sort, no-clean-infojson, no-playlist-metafiles,
|
||||
no-keep-subs, no-attach-info-json, allow-unsafe-ext.
|
||||
Refer __init__.py for their implementation
|
||||
progress_template: Dictionary of templates for progress outputs.
|
||||
Allowed keys are 'download', 'postprocess',
|
||||
@@ -581,8 +597,9 @@ class YoutubeDL:
|
||||
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
|
||||
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
|
||||
'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
|
||||
'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
|
||||
}
|
||||
_deprecated_multivalue_fields = {
|
||||
'album_artist': 'album_artists',
|
||||
@@ -1398,6 +1415,7 @@ class YoutubeDL:
|
||||
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
|
||||
return self.escape_outtmpl(outtmpl) % info_dict
|
||||
|
||||
@_catch_unsafe_extension_error
|
||||
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
|
||||
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
|
||||
if outtmpl is None:
|
||||
@@ -1925,6 +1943,8 @@ class YoutubeDL:
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_channel': ie_result.get('channel'),
|
||||
'playlist_channel_id': ie_result.get('channel_id'),
|
||||
**kwargs,
|
||||
}
|
||||
if strict:
|
||||
@@ -3188,6 +3208,7 @@ class YoutubeDL:
|
||||
os.remove(file)
|
||||
return None
|
||||
|
||||
@_catch_unsafe_extension_error
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result. (Modifies it in-place)"""
|
||||
|
||||
|
||||
@@ -64,6 +64,7 @@ from .utils import (
|
||||
write_string,
|
||||
)
|
||||
from .utils.networking import std_headers
|
||||
from .utils._utils import _UnsafeExtensionError
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
@@ -593,6 +594,13 @@ def validate_options(opts):
|
||||
if opts.ap_username is not None and opts.ap_password is None:
|
||||
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
|
||||
|
||||
# compat option changes global state destructively; only allow from cli
|
||||
if 'allow-unsafe-ext' in opts.compat_opts:
|
||||
warnings.append(
|
||||
'Using allow-unsafe-ext opens you up to potential attacks. '
|
||||
'Use with great care!')
|
||||
_UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x
|
||||
|
||||
return warnings, deprecation_warnings
|
||||
|
||||
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
tests = {
|
||||
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
|
||||
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
|
||||
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
|
||||
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
|
||||
}
|
||||
|
||||
|
||||
def what(file=None, h=None):
|
||||
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
|
||||
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
|
||||
Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
|
||||
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
||||
"""
|
||||
if h is None:
|
||||
with open(file, 'rb') as f:
|
||||
h = f.read(12)
|
||||
return next((type_ for type_, test in tests.items() if test(h)), None)
|
||||
|
||||
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
|
||||
return 'webp'
|
||||
|
||||
if h.startswith(b'\x89PNG'):
|
||||
return 'png'
|
||||
|
||||
if h.startswith(b'\xFF\xD8\xFF'):
|
||||
return 'jpeg'
|
||||
|
||||
if h.startswith(b'GIF'):
|
||||
return 'gif'
|
||||
|
||||
return None
|
||||
|
||||
@@ -2,7 +2,9 @@ import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import datetime as dt
|
||||
import functools
|
||||
import glob
|
||||
import hashlib
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import io
|
||||
@@ -17,14 +19,12 @@ import tempfile
|
||||
import time
|
||||
import urllib.request
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
from .aes import (
|
||||
aes_cbc_decrypt_bytes,
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import functools # isort: split
|
||||
from .compat import compat_os_name
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
@@ -740,40 +740,38 @@ def _get_linux_desktop_environment(env, logger):
|
||||
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
|
||||
desktop_session = env.get('DESKTOP_SESSION', None)
|
||||
if xdg_current_desktop is not None:
|
||||
xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
|
||||
|
||||
if xdg_current_desktop == 'Unity':
|
||||
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
||||
for part in map(str.strip, xdg_current_desktop.split(':')):
|
||||
if part == 'Unity':
|
||||
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
elif part == 'Deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif part == 'GNOME':
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
elif xdg_current_desktop == 'Deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif xdg_current_desktop == 'GNOME':
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif xdg_current_desktop == 'X-Cinnamon':
|
||||
return _LinuxDesktopEnvironment.CINNAMON
|
||||
elif xdg_current_desktop == 'KDE':
|
||||
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||
if kde_version == '5':
|
||||
return _LinuxDesktopEnvironment.KDE5
|
||||
elif kde_version == '6':
|
||||
return _LinuxDesktopEnvironment.KDE6
|
||||
elif kde_version == '4':
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif xdg_current_desktop == 'Pantheon':
|
||||
return _LinuxDesktopEnvironment.PANTHEON
|
||||
elif xdg_current_desktop == 'XFCE':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif xdg_current_desktop == 'UKUI':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif xdg_current_desktop == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
else:
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
elif part == 'X-Cinnamon':
|
||||
return _LinuxDesktopEnvironment.CINNAMON
|
||||
elif part == 'KDE':
|
||||
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||
if kde_version == '5':
|
||||
return _LinuxDesktopEnvironment.KDE5
|
||||
elif kde_version == '6':
|
||||
return _LinuxDesktopEnvironment.KDE6
|
||||
elif kde_version == '4':
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif part == 'Pantheon':
|
||||
return _LinuxDesktopEnvironment.PANTHEON
|
||||
elif part == 'XFCE':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif part == 'UKUI':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif part == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
|
||||
elif desktop_session is not None:
|
||||
if desktop_session == 'deepin':
|
||||
@@ -1001,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger):
|
||||
|
||||
|
||||
def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||
|
||||
|
||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import enum
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -9,7 +10,6 @@ import time
|
||||
import uuid
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import functools
|
||||
from ..networking import Request
|
||||
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
@@ -108,7 +108,7 @@ class ExternalFD(FragmentFD):
|
||||
return all((
|
||||
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
|
||||
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
|
||||
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'),
|
||||
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
|
||||
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
|
||||
))
|
||||
|
||||
|
||||
@@ -160,10 +160,12 @@ class HlsFD(FragmentFD):
|
||||
extra_state = ctx.setdefault('extra_state', {})
|
||||
|
||||
format_index = info_dict.get('format_index')
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
|
||||
extra_segment_query = None
|
||||
if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
|
||||
extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
|
||||
extra_key_query = None
|
||||
if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
|
||||
extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@@ -190,8 +192,8 @@ class HlsFD(FragmentFD):
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
frag_url = urljoin(man_url, line)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
if extra_segment_query:
|
||||
frag_url = update_url_query(frag_url, extra_segment_query)
|
||||
|
||||
fragments.append({
|
||||
'frag_index': frag_index,
|
||||
@@ -212,8 +214,8 @@ class HlsFD(FragmentFD):
|
||||
frag_index += 1
|
||||
map_info = parse_m3u8_attributes(line[11:])
|
||||
frag_url = urljoin(man_url, map_info.get('URI'))
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
if extra_segment_query:
|
||||
frag_url = update_url_query(frag_url, extra_segment_query)
|
||||
|
||||
if map_info.get('BYTERANGE'):
|
||||
splitted_byte_range = map_info.get('BYTERANGE').split('@')
|
||||
@@ -244,8 +246,10 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info['KEY'] = external_aes_key
|
||||
else:
|
||||
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
if extra_key_query or extra_segment_query:
|
||||
# Fall back to extra_segment_query to key for backwards compat
|
||||
decrypt_info['URI'] = update_url_query(
|
||||
decrypt_info['URI'], extra_key_query or extra_segment_query)
|
||||
if decrypt_url != decrypt_info['URI']:
|
||||
decrypt_info['KEY'] = None
|
||||
|
||||
|
||||
@@ -76,6 +76,7 @@ from .aenetworks import (
|
||||
)
|
||||
from .aeonco import AeonCoIE
|
||||
from .afreecatv import (
|
||||
AfreecaTVCatchStoryIE,
|
||||
AfreecaTVIE,
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
@@ -779,6 +780,7 @@ from .gopro import GoProIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gotostage import GoToStageIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .graspop import GraspopIE
|
||||
from .gronkh import (
|
||||
GronkhFeedIE,
|
||||
GronkhIE,
|
||||
@@ -969,6 +971,10 @@ from .la7 import (
|
||||
LA7PodcastEpisodeIE,
|
||||
LA7PodcastIE,
|
||||
)
|
||||
from .laracasts import (
|
||||
LaracastsIE,
|
||||
LaracastsPlaylistIE,
|
||||
)
|
||||
from .lastfm import (
|
||||
LastFMIE,
|
||||
LastFMPlaylistIE,
|
||||
@@ -1113,12 +1119,15 @@ from .meipai import MeipaiIE
|
||||
from .melonvod import MelonVODIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgtv import MGTVIE
|
||||
from .microsoftembed import MicrosoftEmbedIE
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
MicrosoftVirtualAcademyIE,
|
||||
from .microsoftembed import (
|
||||
MicrosoftBuildIE,
|
||||
MicrosoftEmbedIE,
|
||||
MicrosoftLearnEpisodeIE,
|
||||
MicrosoftLearnPlaylistIE,
|
||||
MicrosoftLearnSessionIE,
|
||||
MicrosoftMediusIE,
|
||||
)
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .mildom import (
|
||||
MildomClipIE,
|
||||
MildomIE,
|
||||
@@ -1603,6 +1612,7 @@ from .qqmusic import (
|
||||
QQMusicPlaylistIE,
|
||||
QQMusicSingerIE,
|
||||
QQMusicToplistIE,
|
||||
QQMusicVideoIE,
|
||||
)
|
||||
from .r7 import (
|
||||
R7IE,
|
||||
@@ -1928,6 +1938,10 @@ from .spreaker import (
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .sproutvideo import (
|
||||
SproutVideoIE,
|
||||
VidsIoIE,
|
||||
)
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
@@ -2310,6 +2324,7 @@ from .vidio import (
|
||||
)
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidly import VidlyIE
|
||||
from .vidyard import VidyardIE
|
||||
from .viewlift import (
|
||||
ViewLiftEmbedIE,
|
||||
ViewLiftIE,
|
||||
@@ -2375,6 +2390,10 @@ from .vrt import (
|
||||
VrtNUIE,
|
||||
)
|
||||
from .vtm import VTMIE
|
||||
from .vtv import (
|
||||
VTVIE,
|
||||
VTVGoIE,
|
||||
)
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
|
||||
@@ -368,6 +368,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
|
||||
is_live, m3u8_url = False, None
|
||||
availability = 'public'
|
||||
if video_type == 'now-on-air':
|
||||
is_live = True
|
||||
channel_url = 'https://api.abema.io/v1/channels'
|
||||
@@ -389,6 +390,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
if 3 not in ondemand_types:
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'name'),
|
||||
@@ -408,6 +410,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||
else:
|
||||
@@ -425,6 +428,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
(?P<id>\d+)/?(?:$|[?#&])
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
@@ -189,7 +189,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
headers={'Referer': url}, data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
}), impersonate=True)['data']
|
||||
|
||||
error_code = traverse_obj(data, ('code', {int}))
|
||||
if error_code == -6221:
|
||||
@@ -253,6 +253,43 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
||||
|
||||
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:catchstory'
|
||||
IE_DESC = 'afreecatv.com catch story'
|
||||
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url},
|
||||
query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True)
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
@staticmethod
|
||||
def _entries(data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:live'
|
||||
IE_DESC = 'afreecatv.com livestreams'
|
||||
|
||||
@@ -4,6 +4,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
@@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
|
||||
'format_id': join_nonempty(stream_type, vbr),
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
|
||||
@@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, video_id, mpd_id='dash', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
|
||||
@@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||
]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
@@ -55,7 +47,9 @@ class AtresPlayerIE(InfoExtractor):
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 400)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
@@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor):
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 403)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
title = episode['titulo']
|
||||
|
||||
|
||||
@@ -4,9 +4,13 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,6 +34,7 @@ class BanByeBaseIE(InfoExtractor):
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
'info_dict': {
|
||||
@@ -58,6 +63,7 @@ class BanByeIE(BanByeBaseIE):
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||
'info_dict': {
|
||||
'id': 'v_kb6_o1Kyq-CD',
|
||||
@@ -77,6 +83,48 @@ class BanByeIE(BanByeBaseIE):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# ['src']['hls']['levels'] variant m3u8 urls only; master m3u8 is 404
|
||||
'url': 'https://banbye.com/watch/v_a_gPFuC9LoW5',
|
||||
'info_dict': {
|
||||
'id': 'v_a_gPFuC9LoW5',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:183524056bebdfa245fd6d214f63c0fe',
|
||||
'description': 'md5:943ac87287ca98d28d8b8797719827c6',
|
||||
'uploader': 'wRealu24',
|
||||
'channel_id': 'ch_wrealu24',
|
||||
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'upload_date': '20231113',
|
||||
'timestamp': 1699874062,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_a_gPFuC9LoW5/96.webp',
|
||||
'tags': ['jaszczur', 'sejm', 'lewica', 'polska', 'ukrainizacja', 'pierwszeposiedzeniesejmu'],
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8'],
|
||||
}, {
|
||||
# ['src']['hls']['masterPlaylist'] m3u8 only
|
||||
'url': 'https://banbye.com/watch/v_B0rsKWsr-aaa',
|
||||
'info_dict': {
|
||||
'id': 'v_B0rsKWsr-aaa',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:00b254164b82101b3f9e5326037447ed',
|
||||
'description': 'md5:3fd8b48aa81954ba024bc60f5de6e167',
|
||||
'uploader': 'PSTV Piotr Szlachtowicz ',
|
||||
'channel_id': 'ch_KV9EVObkB9wB',
|
||||
'channel_url': 'https://banbye.com/channel/ch_KV9EVObkB9wB',
|
||||
'upload_date': '20240629',
|
||||
'timestamp': 1719646816,
|
||||
'duration': 2377,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_B0rsKWsr-aaa/96.webp',
|
||||
'tags': ['Biden', 'Trump', 'Wybory', 'USA'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -91,11 +139,24 @@ class BanByeIE(BanByeBaseIE):
|
||||
'id': f'{quality}p',
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
||||
} for quality in [48, 96, 144, 240, 512, 1080]]
|
||||
formats = [{
|
||||
'format_id': f'http-{quality}p',
|
||||
'quality': quality,
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
|
||||
} for quality in data['quality']]
|
||||
|
||||
formats = []
|
||||
url_data = self._download_json(f'{self._API_BASE}/videos/{video_id}/url', video_id, data=b'')
|
||||
if master_url := traverse_obj(url_data, ('src', 'hls', 'masterPlaylist', {url_or_none})):
|
||||
formats = self._extract_m3u8_formats(master_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
for format_id, format_url in traverse_obj(url_data, (
|
||||
'src', ('mp4', 'hls'), 'levels', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
ext = determine_ext(format_url)
|
||||
is_hls = ext == 'm3u8'
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': 'mp4' if is_hls else ext,
|
||||
'format_id': join_nonempty(is_hls and 'hls', format_id),
|
||||
'protocol': 'm3u8_native' if is_hls else 'https',
|
||||
'height': int_or_none(format_id),
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -41,7 +41,7 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
|
||||
@@ -31,12 +31,12 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_count,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
@@ -47,6 +47,23 @@ from ..utils import (
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||
_wbi_key_cache = {}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
|
||||
|
||||
def _check_missing_formats(self, play_info, formats):
|
||||
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
|
||||
missing_formats = join_nonempty(*[
|
||||
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
|
||||
for fmt in traverse_obj(play_info, (
|
||||
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||
if missing_formats:
|
||||
self.to_screen(
|
||||
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||
f'become a premium member to download them. {self._login_hint()}')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
@@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
|
||||
if missing_formats:
|
||||
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
|
||||
f'you have to login or become premium member to download them. {self._login_hint()}')
|
||||
if formats:
|
||||
self._check_missing_formats(play_info, formats)
|
||||
|
||||
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}))
|
||||
if fragments:
|
||||
formats.append({
|
||||
'url': fragments[0]['url'],
|
||||
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
|
||||
**({
|
||||
'fragments': fragments,
|
||||
'protocol': 'http_dash_segments',
|
||||
} if len(fragments) > 1 else {}),
|
||||
**traverse_obj(play_info, {
|
||||
'quality': ('quality', {int_or_none}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
|
||||
}),
|
||||
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _download_playinfo(self, video_id, cid, headers=None):
|
||||
def _get_wbi_key(self, video_id):
|
||||
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
|
||||
return self._wbi_key_cache['key']
|
||||
|
||||
session_data = self._download_json(
|
||||
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
|
||||
|
||||
lookup = ''.join(traverse_obj(session_data, (
|
||||
'data', 'wbi_img', ('img_url', 'sub_url'),
|
||||
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
|
||||
|
||||
# from getMixinKey() in the vendor js
|
||||
mixin_key_enc_tab = [
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
|
||||
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
|
||||
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
|
||||
36, 20, 34, 44, 52,
|
||||
]
|
||||
|
||||
self._wbi_key_cache.update({
|
||||
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
|
||||
'ts': time.time(),
|
||||
})
|
||||
return self._wbi_key_cache['key']
|
||||
|
||||
def _sign_wbi(self, params, video_id):
|
||||
params['wts'] = round(time.time())
|
||||
params = {
|
||||
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
|
||||
for k, v in sorted(params.items())
|
||||
}
|
||||
query = urllib.parse.urlencode(params)
|
||||
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||
return params
|
||||
|
||||
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||
if qn:
|
||||
params['qn'] = qn
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
||||
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||
query=self._sign_wbi(params, bvid), headers=headers,
|
||||
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -115,15 +189,15 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}],
|
||||
}
|
||||
|
||||
subtitle_info = traverse_obj(self._download_json(
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
|
||||
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
|
||||
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
|
||||
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in subs_list:
|
||||
note=f'Extracting subtitle info {cid}')
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
self.report_warning(
|
||||
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in traverse_obj(video_info, (
|
||||
'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
||||
@@ -203,15 +277,15 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
||||
return cid_edges
|
||||
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo):
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
|
||||
graph_version = traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
@@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'timestamp': 1488353834,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'old av URL version',
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'info_dict': {
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'id': 'BV11x411K7CN',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
'id': 'BV11x411K7CN',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308.36,
|
||||
'upload_date': '20140420',
|
||||
'timestamp': 1397983878,
|
||||
@@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'_old_archive_ids': ['bilibili 1074402_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -288,6 +364,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'video has subtitles',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2',
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
@@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 463665680_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 893839363_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'newer festival video',
|
||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||
@@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 778246196_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'legacy flv/mp4 video',
|
||||
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
|
||||
'info_dict': {
|
||||
'id': 'BV1ms411Q7vw_p4',
|
||||
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||
'timestamp': 1458222815,
|
||||
'upload_date': '20160317',
|
||||
'description': '云南方言快乐生产线出品',
|
||||
'duration': float,
|
||||
'uploader': '一笑颠天',
|
||||
'uploader_id': '3916081',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 4120229_part4'],
|
||||
},
|
||||
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
|
||||
'playlist_count': 19,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1ms411Q7vw_p4_0',
|
||||
'ext': 'flv',
|
||||
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||
'duration': 399.102,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'note': 'legacy mp4-only video',
|
||||
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
|
||||
'info_dict': {
|
||||
'id': 'BV1nx411u79K',
|
||||
'ext': 'mp4',
|
||||
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
|
||||
'timestamp': 1508893551,
|
||||
'upload_date': '20171025',
|
||||
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
|
||||
'duration': 80.384,
|
||||
'uploader': '伯远',
|
||||
'uploader_id': '10584494',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 15700301_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'interactive/split-path video',
|
||||
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
||||
@@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||
},
|
||||
'playlist_count': 33,
|
||||
'playlist': [{
|
||||
@@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'upload_date': '20191021',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'note': 'video has subtitles, which requires login',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2', # login required for CC subtitle
|
||||
'_old_archive_ids': ['bilibili 898179753_part1'],
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
'skip': 'login required for subtitle',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
||||
'info_dict': {
|
||||
@@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
if not self._match_valid_url(urlh.url):
|
||||
return self.url_result(urlh.url)
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
headers['Referer'] = url
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
@@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
aid = video_data.get('aid')
|
||||
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
||||
|
||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||
|
||||
festival_info = {}
|
||||
@@ -586,18 +719,65 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
||||
if is_interactive:
|
||||
return self.playlist_result(
|
||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
|
||||
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'formats': self.extract_formats(play_info),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if not traverse_obj(play_info, ('dash')):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
@@ -968,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
|
||||
}))
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
class BilibiliSpaceBaseIE(BilibiliBaseIE):
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
first_page = fetch_page(0)
|
||||
metadata = get_metadata(first_page)
|
||||
@@ -988,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/313580179/video',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_signature(self, playlist_id):
|
||||
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
||||
|
||||
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
||||
img_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
||||
sub_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
||||
|
||||
session_key = img_key + sub_key
|
||||
|
||||
signature_values = []
|
||||
for position in (
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
||||
57, 62, 11, 36, 20, 34, 44, 52,
|
||||
):
|
||||
char_at_position = try_call(lambda: session_key[position])
|
||||
if char_at_position:
|
||||
signature_values.append(char_at_position)
|
||||
|
||||
return ''.join(signature_values)[:32]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
|
||||
signature = self._extract_signature(playlist_id)
|
||||
|
||||
def fetch_page(page_idx):
|
||||
query = {
|
||||
'keyword': '',
|
||||
'mid': playlist_id,
|
||||
'order': 'pubdate',
|
||||
'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
|
||||
'order_avoided': 'true',
|
||||
'platform': 'web',
|
||||
'pn': page_idx + 1,
|
||||
'ps': 30,
|
||||
'tid': 0,
|
||||
'web_location': 1550101,
|
||||
'wts': int(time.time()),
|
||||
}
|
||||
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
||||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||
headers={'referer': url})
|
||||
response = self._download_json(
|
||||
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
|
||||
query=self._sign_wbi(query, playlist_id),
|
||||
note=f'Downloading space page {page_idx}', headers={'Referer': url})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
if response['code'] in (-352, -401):
|
||||
status_code = response['code']
|
||||
if status_code == -401:
|
||||
raise ExtractorError(
|
||||
f'Request is blocked by server ({-response["code"]}), '
|
||||
'please add cookies, wait and try later.', expected=True)
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
elif status_code == -352 and not self.is_logged_in:
|
||||
self.raise_login_required('Request is rejected, you need to login to access playlist')
|
||||
elif status_code != 0:
|
||||
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
|
||||
return response['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1280,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'info_dict': {
|
||||
'id': r're:\d+',
|
||||
'title': '稍后再看',
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
@@ -1356,14 +1519,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'info_dict': {
|
||||
'id': r're:2_\d+',
|
||||
'title': '稍后再看',
|
||||
'uploader': str,
|
||||
'uploader_id': str,
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
'skip': 'redirect url & login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
@@ -1414,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
@@ -1808,7 +1976,8 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
||||
login_post = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
|
||||
data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': base64.b64encode(password_hash).decode('ascii'),
|
||||
'keep_me': 'true',
|
||||
@@ -2140,7 +2309,8 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||
series_info = self._call_api(
|
||||
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||
return self.playlist_result(
|
||||
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
|
||||
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
|
||||
|
||||
@@ -24,7 +24,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
@@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
@@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor):
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'info_dict': {
|
||||
'id': 'bitchute',
|
||||
'title': 'BitChute',
|
||||
'description': 'md5:5329fb3866125afa9446835594a9b138',
|
||||
'description': 'md5:2134c37d64fc3a4846787c402956adac',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
@@ -210,6 +213,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
@@ -230,7 +236,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _make_url(playlist_id, playlist_type):
|
||||
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
|
||||
def _fetch_page(self, playlist_id, playlist_type, page_num):
|
||||
playlist_url = self._make_url(playlist_id, playlist_type)
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
@@ -386,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
return update_url_query(
|
||||
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
'https://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
@@ -470,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
'https://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
@@ -480,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
||||
brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
||||
if referer:
|
||||
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
@@ -538,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
|
||||
})
|
||||
|
||||
def build_format_id(kind):
|
||||
format_id = kind
|
||||
if tbr:
|
||||
format_id += f'-{int(tbr)}k'
|
||||
if height:
|
||||
format_id += f'-{height}p'
|
||||
return format_id
|
||||
return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
|
||||
|
||||
if src or streaming_src:
|
||||
f.update({
|
||||
@@ -801,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
# Look for iframe embeds [1]
|
||||
for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
|
||||
|
||||
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||
# [2] looks like:
|
||||
@@ -830,7 +826,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
player_id = player_id or attrs.get('data-player') or 'default'
|
||||
embed = embed or attrs.get('data-embed') or 'default'
|
||||
|
||||
bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
||||
bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
||||
|
||||
# Some brightcove videos may be embedded with video tag only and
|
||||
# without script tag or any mentioning of brightcove at all. Such
|
||||
@@ -867,7 +863,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
||||
base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
|
||||
@@ -455,10 +455,8 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
def claims_token_expired(self):
|
||||
exp = self._get_claims_token_expiry()
|
||||
if exp - time.time() < 10:
|
||||
# It will expire in less than 10 seconds, or has already expired
|
||||
return True
|
||||
return False
|
||||
# It will expire in less than 10 seconds, or has already expired
|
||||
return exp - time.time() < 10
|
||||
|
||||
def claims_token_valid(self):
|
||||
return self._claims_token is not None and not self.claims_token_expired()
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import base64
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import zlib
|
||||
|
||||
|
||||
@@ -1,63 +1,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
from .vidyard import VidyardBaseIE, VidyardIE
|
||||
from ..utils import ExtractorError, make_archive_id, url_basename
|
||||
|
||||
|
||||
class CellebriteIE(InfoExtractor):
|
||||
class CellebriteIE(VidyardBaseIE):
|
||||
_VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
|
||||
'info_dict': {
|
||||
'id': '16025876',
|
||||
'id': 'ZqmUss3dQfEMGpauambPuH',
|
||||
'display_id': '16025876',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
|
||||
'duration': 455,
|
||||
'tags': [],
|
||||
'description': 'md5:dee48fe12bbae5c01fe6a053f7676da4',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'duration': 455.979,
|
||||
'_old_archive_ids': ['cellebrite 16025876'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
|
||||
'info_dict': {
|
||||
'id': '29018255',
|
||||
'id': 'QV1U8a2yzcxigw7VFnqKyg',
|
||||
'display_id': '29018255',
|
||||
'ext': 'mp4',
|
||||
'duration': 134,
|
||||
'tags': [],
|
||||
'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
|
||||
'title': 'How to Lawfully Collect the Maximum Amount of Data From Android Devices',
|
||||
'description': 'md5:0e943a9ac14c374d5d74faed634d773c',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
|
||||
'title': 'Android Extractions Explained',
|
||||
'duration': 134.315,
|
||||
'_old_archive_ids': ['cellebrite 29018255'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitles(self, json_data, display_id):
|
||||
formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
|
||||
subtitles = {}
|
||||
|
||||
for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
|
||||
fmt, sub = self._extract_m3u8_formats_and_subtitles(
|
||||
url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(sub, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
slug = self._match_id(url)
|
||||
webpage = self._download_webpage(url, slug)
|
||||
vidyard_url = next(VidyardIE._extract_embed_urls(url, webpage), None)
|
||||
if not vidyard_url:
|
||||
raise ExtractorError('No Vidyard video embeds found on page')
|
||||
|
||||
player_uuid = self._search_regex(
|
||||
r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
|
||||
video_id = url_basename(vidyard_url)
|
||||
info = self._process_video_json(self._fetch_video_json(video_id)['chapters'][0], video_id)
|
||||
if info.get('display_id'):
|
||||
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||
if thumbnail := self._og_search_thumbnail(webpage, default=None):
|
||||
info.setdefault('thumbnails', []).append({'url': thumbnail})
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
|
||||
return {
|
||||
'id': str(json_data['videoId']),
|
||||
'title': json_data.get('name') or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': json_data.get('description') or self._og_search_description(webpage),
|
||||
'duration': json_data.get('seconds'),
|
||||
'tags': json_data.get('tags'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'http_headers': {'Referer': 'https://play.vidyard.com/'},
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
**info,
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
live_detail = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
||||
f'https://api.chzzk.naver.com/service/v3/channels/{channel_id}/live-detail', channel_id,
|
||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
@@ -106,12 +106,45 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
'upload_date': '20231219',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Replay video is expired',
|
||||
}, {
|
||||
# Manually uploaded video
|
||||
'url': 'https://chzzk.naver.com/video/1980',
|
||||
'info_dict': {
|
||||
'id': '1980',
|
||||
'ext': 'mp4',
|
||||
'title': '※시청주의※한번보면 잊기 힘든 영상',
|
||||
'channel': '라디유radiyu',
|
||||
'channel_id': '68f895c59a1043bc5019b5e08c83a5c5',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 95,
|
||||
'timestamp': 1703102631.722,
|
||||
'upload_date': '20231220',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# Partner channel replay video
|
||||
'url': 'https://chzzk.naver.com/video/2458',
|
||||
'info_dict': {
|
||||
'id': '2458',
|
||||
'ext': 'mp4',
|
||||
'title': '첫 방송',
|
||||
'channel': '강지',
|
||||
'channel_id': 'b5ed5db484d04faf4d150aedd362f34b',
|
||||
'channel_is_verified': True,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 4433,
|
||||
'timestamp': 1703307460.214,
|
||||
'upload_date': '20231223',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
||||
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
|
||||
@@ -6,11 +6,11 @@ from .common import InfoExtractor
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
|
||||
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
]
|
||||
_TESTS = [{
|
||||
@@ -24,6 +24,14 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
|
||||
'info_dict': {
|
||||
'id': '0e8e040aec776862e1d632a699edf59e',
|
||||
'ext': 'mp4',
|
||||
'title': '0e8e040aec776862e1d632a699edf59e',
|
||||
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
'only_matching': True,
|
||||
@@ -36,6 +44,9 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
},
|
||||
}, {
|
||||
# Video-only m3u8 formats need manual fixup
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
|
||||
'md5': 'fc472e40f6e6238446509be411c920e2',
|
||||
'info_dict': {
|
||||
'id': '08j_d24-6000-074',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240620',
|
||||
'duration': 1673,
|
||||
'title': 'D24-6000-074-cetstud',
|
||||
'timestamp': 1718902233,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
@@ -63,6 +79,9 @@ class CloudyCDNIE(InfoExtractor):
|
||||
formats, subtitles = [], {}
|
||||
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
||||
for fmt in fmts:
|
||||
if re.search(r'chunklist_b\d+_vo_', fmt['url']):
|
||||
fmt['acodec'] = 'none'
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
|
||||
@@ -234,7 +234,14 @@ class InfoExtractor:
|
||||
'maybe' if the format may have DRM and has to be tested before download.
|
||||
* extra_param_to_segment_url A query string to append to each
|
||||
fragment's URL, or to update each existing query string
|
||||
with. Only applied by the native HLS/DASH downloaders.
|
||||
with. If it is an HLS stream with an AES-128 decryption key,
|
||||
the query paramaters will be passed to the key URI as well,
|
||||
unless there is an `extra_param_to_key_url` given,
|
||||
or unless an external key URI is provided via `hls_aes`.
|
||||
Only applied by the native HLS/DASH downloaders.
|
||||
* extra_param_to_key_url A query string to append to the URL
|
||||
of the format's HLS AES-128 decryption key.
|
||||
Only applied by the native HLS downloader.
|
||||
* hls_aes A dictionary of HLS AES-128 decryption information
|
||||
used by the native HLS downloader to override the
|
||||
values in the media playlist when an '#EXT-X-KEY' tag
|
||||
@@ -2215,6 +2222,11 @@ class InfoExtractor:
|
||||
'quality': quality,
|
||||
'has_drm': has_drm,
|
||||
}
|
||||
|
||||
# YouTube-specific
|
||||
if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
|
||||
f['language'] = yt_audio_content_id.split('.')[0]
|
||||
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
|
||||
@@ -2,6 +2,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -52,7 +53,7 @@ class DailyMailIE(InfoExtractor):
|
||||
is_hls = container == 'M2TS'
|
||||
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
|
||||
formats.append({
|
||||
'format_id': ('hls' if is_hls else protocol) + (f'-{tbr}' if tbr else ''),
|
||||
'format_id': join_nonempty('hls' if is_hls else protocol, tbr),
|
||||
'url': rendition_url,
|
||||
'width': int_or_none(rendition.get('frameWidth')),
|
||||
'height': int_or_none(rendition.get('frameHeight')),
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_resolution,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DigitalConcertHallIE(InfoExtractor):
|
||||
IE_DESC = 'DigitalConcertHall extractor'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'digitalconcerthall'
|
||||
@@ -26,7 +26,8 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'upload_date': '20210624',
|
||||
'timestamp': 1624548600,
|
||||
'duration': 2798,
|
||||
'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
|
||||
'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
|
||||
'composers': ['Kurt Weill'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -34,8 +35,9 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'url': 'https://www.digitalconcerthall.com/en/concert/53785',
|
||||
'info_dict': {
|
||||
'id': '53785',
|
||||
'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
|
||||
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
|
||||
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 3,
|
||||
@@ -49,9 +51,20 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
'upload_date': '20220714',
|
||||
'timestamp': 1657785600,
|
||||
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
|
||||
'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'Concert with several works and an interview',
|
||||
'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
|
||||
'info_dict': {
|
||||
'id': '53785',
|
||||
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
|
||||
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@@ -97,15 +110,14 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'Accept-Language': language,
|
||||
})
|
||||
|
||||
m3u8_url = traverse_obj(
|
||||
stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
formats = []
|
||||
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False))
|
||||
|
||||
yield {
|
||||
'id': video_id,
|
||||
'title': item.get('title'),
|
||||
'composer': item.get('name_composer'),
|
||||
'url': m3u8_url,
|
||||
'formats': formats,
|
||||
'duration': item.get('duration_total'),
|
||||
'timestamp': traverse_obj(item, ('date', 'published')),
|
||||
@@ -119,31 +131,32 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
|
||||
language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part')
|
||||
if not language:
|
||||
language = 'en'
|
||||
|
||||
thumbnail_url = self._html_search_regex(
|
||||
r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
|
||||
self._download_webpage(url, video_id), 'thumbnail')
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
**parse_resolution(thumbnail_url),
|
||||
}]
|
||||
|
||||
api_type = 'concert' if type_ == 'work' else type_
|
||||
vid_info = self._download_json(
|
||||
f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
|
||||
f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Accept-Language': language,
|
||||
})
|
||||
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
|
||||
album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name'))
|
||||
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||
|
||||
if type_ == 'work':
|
||||
videos = [videos[int(part) - 1]]
|
||||
|
||||
thumbnail = traverse_obj(vid_info, (
|
||||
'image', ..., {self._proto_relative_url}, {url_or_none},
|
||||
{lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': vid_info.get('title'),
|
||||
'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
|
||||
'thumbnails': thumbnails,
|
||||
'album_artist': album_artist,
|
||||
'entries': self._entries(
|
||||
videos, language, type_, thumbnail=thumbnail, album_artists=album_artists),
|
||||
'thumbnail': thumbnail,
|
||||
'album_artists': album_artists,
|
||||
}
|
||||
|
||||
@@ -24,8 +24,9 @@ from ..utils import (
|
||||
class DouyuBaseIE(InfoExtractor):
|
||||
def _download_cryptojs_md5(self, video_id):
|
||||
for url in [
|
||||
# XXX: Do NOT use cdn.bootcdn.net; ref: https://sansec.io/research/polyfill-supply-chain-attack
|
||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://unpkg.com/cryptojslib@3.1.2/rollups/md5.js',
|
||||
]:
|
||||
js_code = self._download_webpage(
|
||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||
@@ -35,7 +36,8 @@ class DouyuBaseIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||
|
||||
def _get_cryptojs_md5(self, video_id):
|
||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||
return self.cache.load(
|
||||
'douyu', 'crypto-js-md5', min_ver='2024.07.04') or self._download_cryptojs_md5(video_id)
|
||||
|
||||
def _calc_sign(self, sign_func, video_id, a):
|
||||
b = uuid.uuid4().hex
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
@@ -37,12 +42,9 @@ class EitbIE(InfoExtractor):
|
||||
if not video_url:
|
||||
continue
|
||||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += f'-{int(tbr)}'
|
||||
formats.append({
|
||||
'url': rendition['PMD_URL'],
|
||||
'format_id': format_id,
|
||||
'format_id': join_nonempty('http', int_or_none(tbr)),
|
||||
'width': int_or_none(rendition.get('FRAME_WIDTH')),
|
||||
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
|
||||
'tbr': tbr,
|
||||
|
||||
@@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118',
|
||||
},
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
|
||||
@@ -621,6 +621,9 @@ class FacebookIE(InfoExtractor):
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
if not formats:
|
||||
# Do not append false positive entry w/o any formats
|
||||
return
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||
|
||||
@@ -2167,7 +2167,15 @@ class GenericIE(InfoExtractor):
|
||||
urllib.parse.urlparse(fragment_query).query or fragment_query
|
||||
or urllib.parse.urlparse(manifest_url).query or None)
|
||||
|
||||
hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
|
||||
key_query = self._configuration_arg('key_query', [None], casesense=True)[0]
|
||||
if key_query is not None:
|
||||
info['extra_param_to_key_url'] = (
|
||||
urllib.parse.urlparse(key_query).query or key_query
|
||||
or urllib.parse.urlparse(manifest_url).query or None)
|
||||
|
||||
def hex_or_none(value):
|
||||
return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None
|
||||
|
||||
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
|
||||
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
|
||||
}) or None
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
remove_start,
|
||||
@@ -287,7 +288,7 @@ class GoIE(AdobePassIE):
|
||||
if mobj:
|
||||
height = int(mobj.group(2))
|
||||
f.update({
|
||||
'format_id': (f'{format_id}-' if format_id else '') + f'{height}P',
|
||||
'format_id': join_nonempty(format_id, f'{height}P'),
|
||||
'width': int(mobj.group(1)),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
32
yt_dlp/extractor/graspop.py
Normal file
32
yt_dlp/extractor/graspop.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import update_url, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GraspopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/',
|
||||
'info_dict': {
|
||||
'id': '101556',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thy Art Is Murder',
|
||||
'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
# Downgrade manifest request to avoid incomplete certificate chain error
|
||||
update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('name', {str}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
urljoin,
|
||||
xpath_element,
|
||||
@@ -69,7 +70,7 @@ class HBOBaseIE(InfoExtractor):
|
||||
height = format_info.get('height')
|
||||
fmt = {
|
||||
'url': path,
|
||||
'format_id': 'http{}'.format(f'-{height}p' if height else ''),
|
||||
'format_id': join_nonempty('http'. height and f'{height}p'),
|
||||
'width': format_info.get('width'),
|
||||
'height': height,
|
||||
}
|
||||
|
||||
@@ -44,9 +44,6 @@ class HKETVIE(InfoExtractor):
|
||||
'duration': 907,
|
||||
'subtitles': {},
|
||||
},
|
||||
'params': {
|
||||
'geo_verification_proxy': '<HK proxy here>',
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}]
|
||||
|
||||
|
||||
@@ -453,7 +453,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
else:
|
||||
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
|
||||
webpage = self._download_webpage(
|
||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
|
||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or ''
|
||||
additional_data = self._search_json(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
|
||||
if not additional_data and not media:
|
||||
|
||||
@@ -2,7 +2,6 @@ import functools
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE):
|
||||
'title': 'naagin',
|
||||
},
|
||||
'playlist_mincount': 120,
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
|
||||
'info_dict': {
|
||||
'id': '3499820',
|
||||
'title': 'mtv-splitsvilla-x5',
|
||||
},
|
||||
'playlist_mincount': 310,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id):
|
||||
seasons = self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
|
||||
'Downloading series metadata JSON', query={
|
||||
'sort': 'season:asc',
|
||||
'id': series_id,
|
||||
'responseType': 'common',
|
||||
})
|
||||
seasons = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
|
||||
'Downloading series metadata JSON', query={'responseType': 'common'}), (
|
||||
'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
|
||||
'trayTabs', lambda _, v: v['id']))
|
||||
|
||||
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
|
||||
for season_num, season in enumerate(seasons, start=1):
|
||||
season_id = season['id']
|
||||
label = season.get('season') or season_num
|
||||
label = season.get('label') or season_num
|
||||
for page_num in itertools.count(1):
|
||||
episodes = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
||||
|
||||
@@ -158,7 +158,7 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
|
||||
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||
'info_dict': {
|
||||
@@ -173,6 +173,13 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
'title': 'Mood Hindi',
|
||||
},
|
||||
'playlist_mincount': 801,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
|
||||
'info_dict': {
|
||||
'id': 'Me5RridRfDk_',
|
||||
'title': 'Taaza Tunes',
|
||||
},
|
||||
'playlist_mincount': 301,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
|
||||
@@ -3,43 +3,52 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
|
||||
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video['youtubeId'],
|
||||
'id': video.get('slug'),
|
||||
'title': video.get('title'),
|
||||
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'description': video.get('description'),
|
||||
'id': video['youtubeId'],
|
||||
'ie_key': 'Youtube',
|
||||
**traverse_obj(video, {
|
||||
'display_id': ('id', {str_or_none}),
|
||||
'title': ('translatedTitle', {str}),
|
||||
'thumbnail': ('thumbnailUrls', ..., 'url', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'description': ('description', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.khanacademy.org/api/internal/graphql/FetchContentData',
|
||||
display_id, query={
|
||||
'https://www.khanacademy.org/api/internal/graphql/ContentForPath', display_id,
|
||||
query={
|
||||
'fastly_cacheable': 'persist_until_publish',
|
||||
'hash': '4134764944',
|
||||
'lang': 'en',
|
||||
'pcv': self._PUBLISHED_CONTENT_VERSION,
|
||||
'hash': '1242644265',
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'queryParams': 'lang=en',
|
||||
'isModal': False,
|
||||
'followRedirects': True,
|
||||
'countryCode': 'US',
|
||||
'kaLocale': 'en',
|
||||
'clientPublishedContentVersion': self._PUBLISHED_CONTENT_VERSION,
|
||||
}),
|
||||
})['data']['contentJson']
|
||||
return self._parse_component_props(self._parse_json(content, display_id)['componentProps'])
|
||||
'lang': 'en',
|
||||
})['data']['contentRoute']['listedPathData']
|
||||
return self._parse_component_props(content, display_id)
|
||||
|
||||
|
||||
class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
@@ -47,64 +56,98 @@ class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
|
||||
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
|
||||
'md5': '1d5c2e70fa6aa29c38eca419f12515ce',
|
||||
'info_dict': {
|
||||
'id': 'FlIG3TvQCBQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'The one-time pad',
|
||||
'description': 'The perfect cipher',
|
||||
'display_id': '716378217',
|
||||
'duration': 176,
|
||||
'uploader': 'Brit Cruise',
|
||||
'uploader_id': 'khanacademy',
|
||||
'uploader': 'Khan Academy',
|
||||
'uploader_id': '@khanacademy',
|
||||
'uploader_url': 'https://www.youtube.com/@khanacademy',
|
||||
'upload_date': '20120411',
|
||||
'timestamp': 1334170113,
|
||||
'license': 'cc-by-nc-sa',
|
||||
'live_status': 'not_live',
|
||||
'channel': 'Khan Academy',
|
||||
'channel_id': 'UC4a-Gbdw7vOaccHmFo40b9g',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g',
|
||||
'channel_is_verified': True,
|
||||
'playable_in_embed': True,
|
||||
'categories': ['Education'],
|
||||
'creators': ['Brit Cruise'],
|
||||
'tags': [],
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'thumbnail': str,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'heatmap': list,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
video = component_props['tutorialPageData']['contentModel']
|
||||
info = self._parse_video(video)
|
||||
author_names = video.get('authorNames')
|
||||
info.update({
|
||||
'uploader': ', '.join(author_names) if author_names else None,
|
||||
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||
'license': video.get('kaUserLicense'),
|
||||
})
|
||||
return info
|
||||
def _parse_component_props(self, component_props, display_id):
|
||||
video = component_props['content']
|
||||
return {
|
||||
**self._parse_video(video),
|
||||
**traverse_obj(video, {
|
||||
'creators': ('authorNames', ..., {str}),
|
||||
'timestamp': ('dateAdded', {parse_iso8601}),
|
||||
'license': ('kaUserLicense', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KhanAcademyUnitIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy:unit'
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('1,2', '')) + '/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
|
||||
'info_dict': {
|
||||
'id': 'cryptography',
|
||||
'id': 'x48c910b6',
|
||||
'title': 'Cryptography',
|
||||
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
||||
'display_id': 'computing/computer-science/cryptography',
|
||||
'_old_archive_ids': ['khanacademyunit cryptography'],
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science',
|
||||
'info_dict': {
|
||||
'id': 'x301707a0',
|
||||
'title': 'Computer science theory',
|
||||
'description': 'md5:4b472a4646e6cf6ec4ccb52c4062f8ba',
|
||||
'display_id': 'computing/computer-science',
|
||||
'_old_archive_ids': ['khanacademyunit computer-science'],
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}]
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
curation = component_props['curation']
|
||||
def _parse_component_props(self, component_props, display_id):
|
||||
course = component_props['course']
|
||||
selected_unit = traverse_obj(course, (
|
||||
'unitChildren', lambda _, v: v['relativeUrl'] == f'/{display_id}', any)) or course
|
||||
|
||||
entries = []
|
||||
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
|
||||
for tutorial_number, tutorial in enumerate(tutorials, 1):
|
||||
chapter_info = {
|
||||
'chapter': tutorial.get('title'),
|
||||
'chapter_number': tutorial_number,
|
||||
'chapter_id': tutorial.get('id'),
|
||||
}
|
||||
for content_item in (tutorial.get('contentItems') or []):
|
||||
if content_item.get('kind') == 'Video':
|
||||
info = self._parse_video(content_item)
|
||||
info.update(chapter_info)
|
||||
entries.append(info)
|
||||
def build_entry(entry):
|
||||
return self.url_result(urljoin(
|
||||
'https://www.khanacademy.org', entry['canonicalUrl']),
|
||||
KhanAcademyIE, title=entry.get('translatedTitle'))
|
||||
|
||||
entries = traverse_obj(selected_unit, (
|
||||
(('unitChildren', ...), None), 'allOrderedChildren', ..., 'curatedChildren',
|
||||
lambda _, v: v['contentKind'] == 'Video' and v['canonicalUrl'], {build_entry}))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, curation.get('unit'), curation.get('title'),
|
||||
curation.get('description'))
|
||||
entries,
|
||||
display_id=display_id,
|
||||
**traverse_obj(selected_unit, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('translatedTitle', {str}),
|
||||
'description': ('translatedDescription', {str}),
|
||||
'_old_archive_ids': ('slug', {str}, {lambda x: [make_archive_id(self, x)] if x else None}),
|
||||
}))
|
||||
|
||||
114
yt_dlp/extractor/laracasts.py
Normal file
114
yt_dlp/extractor/laracasts.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LaracastsBaseIE(InfoExtractor):
|
||||
def _get_prop_data(self, url, display_id):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return traverse_obj(
|
||||
get_element_html_by_id('app', webpage),
|
||||
({extract_attributes}, 'data-page', {json.loads}, 'props'))
|
||||
|
||||
def _parse_episode(self, episode):
|
||||
if not traverse_obj(episode, 'vimeoId'):
|
||||
self.raise_login_required('This video is only available for subscribers.')
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
|
||||
VimeoIE, url_transparent=True,
|
||||
**traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
|
||||
'title': ('title', {clean_html}),
|
||||
'season_number': ('chapter', {int_or_none}),
|
||||
'episode_number': ('position', {int_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('largeThumbnail', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'date': ('dateSegments', 'published', {unified_strdate}),
|
||||
}))
|
||||
|
||||
|
||||
class LaracastsIE(LaracastsBaseIE):
|
||||
IE_NAME = 'laracasts'
|
||||
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
|
||||
'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
|
||||
'info_dict': {
|
||||
'id': '922040563',
|
||||
'title': 'Hello, Laravel',
|
||||
'ext': 'mp4',
|
||||
'duration': 519,
|
||||
'date': '20240312',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
|
||||
'description': 'md5:ddd658bb241975871d236555657e1dd1',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'uploader': 'Laracasts',
|
||||
'uploader_id': 'user20182673',
|
||||
'uploader_url': 'https://vimeo.com/user20182673',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
|
||||
|
||||
|
||||
class LaracastsPlaylistIE(LaracastsBaseIE):
|
||||
IE_NAME = 'laracasts:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
|
||||
'info_dict': {
|
||||
'title': '30 Days to Learn Laravel',
|
||||
'id': '210',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
|
||||
'duration': 30600.0,
|
||||
'modified_date': '20240511',
|
||||
'description': 'md5:27c260a1668a450984e8f901579912dd',
|
||||
'categories': ['Frameworks'],
|
||||
'tags': ['Laravel'],
|
||||
'display_id': '30-days-to-learn-laravel-11',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
series = self._get_prop_data(url, display_id)['series']
|
||||
|
||||
metadata = {
|
||||
'display_id': display_id,
|
||||
**traverse_obj(series, {
|
||||
'title': ('title', {str}),
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
|
||||
'duration': ('runTime', {parse_duration}),
|
||||
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
|
||||
'tags': ('topics', ..., 'name', {str}),
|
||||
'modified_date': ('lastUpdated', {unified_strdate}),
|
||||
}),
|
||||
}
|
||||
|
||||
return self.playlist_result(traverse_obj(
|
||||
series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)
|
||||
@@ -1,51 +1,35 @@
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import xpath_text
|
||||
|
||||
|
||||
class MatchTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
|
||||
_VALID_URL = [
|
||||
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
|
||||
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://matchtv.ru/#live-player',
|
||||
'url': 'http://matchtv.ru/on-air/',
|
||||
'info_dict': {
|
||||
'id': 'matchtv-live',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://matchtv.ru/on-air/',
|
||||
'url': 'https://video.matchtv.ru/iframe/channel/106',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'matchtv-live'
|
||||
video_url = self._download_json(
|
||||
'http://player.matchtv.ntvplus.tv/player/smil', video_id,
|
||||
query={
|
||||
'ts': '',
|
||||
'quality': 'SD',
|
||||
'contentId': '561d2c0df7159b37178b4567',
|
||||
'sign': '',
|
||||
'includeHighlights': '0',
|
||||
'userId': '',
|
||||
'sessionId': random.randint(1, 1000000000),
|
||||
'contentType': 'channel',
|
||||
'timeShift': '0',
|
||||
'platform': 'portal',
|
||||
},
|
||||
headers={
|
||||
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
|
||||
})['data']['videoUrl']
|
||||
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'Матч ТВ - Прямой эфир',
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
|
||||
|
||||
@@ -212,13 +213,14 @@ class MediasiteIE(InfoExtractor):
|
||||
stream_type, 'type%u' % stream_type)
|
||||
|
||||
stream_formats = []
|
||||
for unum, video_url in enumerate(video_urls):
|
||||
video_url = url_or_none(video_url.get('Location'))
|
||||
for unum, video in enumerate(video_urls):
|
||||
video_url = url_or_none(video.get('Location'))
|
||||
if not video_url:
|
||||
continue
|
||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||
|
||||
media_type = video_url.get('MediaType')
|
||||
media_type = video.get('MediaType')
|
||||
ext = mimetype2ext(video.get('MimeType'))
|
||||
if media_type == 'SS':
|
||||
stream_formats.extend(self._extract_ism_formats(
|
||||
video_url, resource_id,
|
||||
@@ -229,15 +231,20 @@ class MediasiteIE(InfoExtractor):
|
||||
video_url, resource_id,
|
||||
mpd_id=f'{stream_id}-{snum}.{unum}',
|
||||
fatal=False))
|
||||
elif ext in ('m3u', 'm3u8'):
|
||||
stream_formats.extend(self._extract_m3u8_formats(
|
||||
video_url, resource_id,
|
||||
m3u8_id=f'{stream_id}-{snum}.{unum}',
|
||||
fatal=False))
|
||||
else:
|
||||
stream_formats.append({
|
||||
'format_id': f'{stream_id}-{snum}.{unum}',
|
||||
'url': video_url,
|
||||
'ext': mimetype2ext(video_url.get('MimeType')),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
if stream.get('HasSlideContent', False):
|
||||
images = player_options['PlayerLayoutOptions']['Images']
|
||||
images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}))
|
||||
if stream.get('HasSlideContent') and images:
|
||||
stream_formats.append(self.__extract_slides(
|
||||
stream_id=stream_id,
|
||||
snum=snum,
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj, unified_timestamp
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MicrosoftEmbedIE(InfoExtractor):
|
||||
@@ -63,3 +72,250 @@ class MicrosoftEmbedIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftMediusBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _sub_to_dict(subtitle_list):
|
||||
subtitles = {}
|
||||
for sub in subtitle_list:
|
||||
subtitles.setdefault(sub.pop('tag', 'und'), []).append(sub)
|
||||
return subtitles
|
||||
|
||||
def _extract_ism(self, ism_url, video_id):
|
||||
formats = self._extract_ism_formats(ism_url, video_id)
|
||||
for fmt in formats:
|
||||
if fmt['language'] != 'eng' and 'English' not in fmt['format_id']:
|
||||
fmt['language_preference'] = -10
|
||||
return formats
|
||||
|
||||
|
||||
class MicrosoftMediusIE(MicrosoftMediusBaseIE):
|
||||
_VALID_URL = r'https?://medius\.microsoft\.com/Embed/(?:Video\?id=|video-nc/|VideoDetails/)(?P<id>[\da-f-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://medius.microsoft.com/Embed/video-nc/9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||
'info_dict': {
|
||||
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||
'ext': 'ismv',
|
||||
'title': 'Rapidly code, test and ship from secure cloud developer environments',
|
||||
'description': 'md5:33c8e4facadc438613476eea24165f71',
|
||||
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||
'subtitles': 'count:30',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medius.microsoft.com/Embed/video-nc/81215af5-c813-4dcd-aede-94f4e1a7daa3',
|
||||
'info_dict': {
|
||||
'id': '81215af5-c813-4dcd-aede-94f4e1a7daa3',
|
||||
'ext': 'ismv',
|
||||
'title': 'Microsoft Build opening',
|
||||
'description': 'md5:43455096141077a1f23144cab8cec1cb',
|
||||
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||
'subtitles': 'count:31',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medius.microsoft.com/Embed/VideoDetails/78493569-9b3b-4a85-a409-ee76e789e25c',
|
||||
'info_dict': {
|
||||
'id': '78493569-9b3b-4a85-a409-ee76e789e25c',
|
||||
'ext': 'ismv',
|
||||
'title': ' Anomaly Detection & Root cause at Edge',
|
||||
'description': 'md5:f8f1ad93d7918649bfb97fa081b03b83',
|
||||
'thumbnail': r're:https://mediusdownload.event.microsoft.com/asset.*\.jpg.*',
|
||||
'subtitles': 'count:17',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medius.microsoft.com/Embed/Video?id=0dc69bda-079b-4070-a7db-a8da1a06a9c7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://medius.microsoft.com/Embed/video-nc/fe823a91-959c-465b-96d4-8f4db624f72c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_subtitle(self, webpage, video_id):
|
||||
captions = traverse_obj(
|
||||
self._search_json(r'const\s+captionsConfiguration\s*=', webpage, 'captions', video_id, default=None),
|
||||
('languageList', lambda _, v: url_or_none(v['src']), {
|
||||
'url': 'src',
|
||||
'tag': ('srclang', {str}),
|
||||
'name': ('kind', {str}),
|
||||
})) or [{'url': url, 'tag': url_basename(url).split('.vtt')[0].split('_')[-1]}
|
||||
for url in re.findall(r'var\s+file\s+=\s+\{[^}]+\'(https://[^\']+\.vtt\?[^\']+)', webpage)]
|
||||
|
||||
return self._sub_to_dict(captions)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(f'https://medius.microsoft.com/Embed/video-nc/{video_id}', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': self._extract_ism(
|
||||
self._search_regex(r'StreamUrl\s*=\s*"([^"]+manifest)"', webpage, 'ism url'), video_id),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'subtitles': self._extract_subtitle(webpage, video_id),
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftLearnPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?(?P<type>shows|events)/(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners',
|
||||
'info_dict': {
|
||||
'id': 'bash-for-beginners',
|
||||
'title': 'Bash for Beginners',
|
||||
'description': 'md5:16a91c07222117d1e00912f0dbc02c2c',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}, {
|
||||
'url': 'https://learn.microsoft.com/en-us/events/build-2022',
|
||||
'info_dict': {
|
||||
'id': 'build-2022',
|
||||
'title': 'Microsoft Build 2022 - Events',
|
||||
'description': 'md5:c16b43848027df837b22c6fbac7648d3',
|
||||
},
|
||||
'playlist_count': 201,
|
||||
}]
|
||||
|
||||
def _entries(self, url_base, video_id):
|
||||
skip = 0
|
||||
while True:
|
||||
playlist_info = self._download_json(url_base, video_id, f'Downloading entries {skip}', query={
|
||||
'locale': 'en-us',
|
||||
'$skip': skip,
|
||||
})
|
||||
url_paths = traverse_obj(playlist_info, ('results', ..., 'url', {str}))
|
||||
for url_path in url_paths:
|
||||
yield self.url_result(f'https://learn.microsoft.com/en-us{url_path}')
|
||||
skip += len(url_paths)
|
||||
if skip >= playlist_info.get('count', 0) or not url_paths:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
metainfo = {
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
sub_type = 'episodes' if playlist_type == 'shows' else 'sessions'
|
||||
|
||||
url_base = f'https://learn.microsoft.com/api/contentbrowser/search/{playlist_type}/{playlist_id}/{sub_type}'
|
||||
return self.playlist_result(self._entries(url_base, playlist_id), playlist_id, **metainfo)
|
||||
|
||||
|
||||
class MicrosoftLearnEpisodeIE(MicrosoftMediusBaseIE):
|
||||
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?shows/[\w-]+/(?P<id>[^?#/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners/what-is-the-difference-between-a-terminal-and-a-shell-2-of-20-bash-for-beginners/',
|
||||
'info_dict': {
|
||||
'id': 'd44e1a03-a0e5-45c2-9496-5c9fa08dc94c',
|
||||
'ext': 'ismv',
|
||||
'title': 'What is the Difference Between a Terminal and a Shell? (Part 2 of 20)',
|
||||
'description': 'md5:7bbbfb593d21c2cf2babc3715ade6b88',
|
||||
'timestamp': 1676339547,
|
||||
'upload_date': '20230214',
|
||||
'thumbnail': r're:https://learn\.microsoft\.com/video/media/.*\.png',
|
||||
'subtitles': 'count:14',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
entry_id = self._html_search_meta('entryId', webpage, 'entryId', fatal=True)
|
||||
video_info = self._download_json(
|
||||
f'https://learn.microsoft.com/api/video/public/v1/entries/{entry_id}', video_id)
|
||||
return {
|
||||
'id': entry_id,
|
||||
'formats': self._extract_ism(video_info['publicVideo']['adaptiveVideoUrl'], video_id),
|
||||
'subtitles': self._sub_to_dict(traverse_obj(video_info, (
|
||||
'publicVideo', 'captions', lambda _, v: url_or_none(v['url']), {
|
||||
'tag': ('language', {str}),
|
||||
'url': 'url',
|
||||
}))),
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
**traverse_obj(video_info, {
|
||||
'timestamp': ('createTime', {parse_iso8601}),
|
||||
'thumbnails': ('publicVideo', 'thumbnailOtherSizes', ..., {'url': {url_or_none}}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftLearnSessionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?events/[\w-]+/(?P<id>[^?#/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learn.microsoft.com/en-us/events/build-2022/ts01-rapidly-code-test-ship-from-secure-cloud-developer-environments',
|
||||
'info_dict': {
|
||||
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||
'ext': 'ismv',
|
||||
'title': 'Rapidly code, test and ship from secure cloud developer environments - Events',
|
||||
'description': 'md5:f26c1a85d41c1cffd27a0279254a25c3',
|
||||
'timestamp': 1653408600,
|
||||
'upload_date': '20220524',
|
||||
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
metainfo = {
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': parse_iso8601(self._html_search_meta('startDate', webpage, 'startDate')),
|
||||
}
|
||||
|
||||
return self.url_result(
|
||||
self._html_search_meta('externalVideoUrl', webpage, 'videoUrl', fatal=True),
|
||||
url_transparent=True, ie=MicrosoftMediusIE, **metainfo)
|
||||
|
||||
|
||||
class MicrosoftBuildIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://build\.microsoft\.com/[\w-]+/sessions/(?P<id>[\da-f-]+)',
|
||||
r'https?://build\.microsoft\.com/[\w-]+/(?P<id>sessions)/?(?:[?#]|$)',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://build.microsoft.com/en-US/sessions/b49feb31-afcd-4217-a538-d3ca1d171198?source=sessions',
|
||||
'info_dict': {
|
||||
'id': 'aee55fb5-fcf9-4b38-b764-a3527cb57554',
|
||||
'ext': 'ismv',
|
||||
'title': 'Microsoft Build opening keynote',
|
||||
'description': 'md5:d38338f336ef4b6ef9ad2a7466a76655',
|
||||
'timestamp': 1716307200,
|
||||
'upload_date': '20240521',
|
||||
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://build.microsoft.com/en-US/sessions',
|
||||
'info_dict': {
|
||||
'id': 'sessions',
|
||||
},
|
||||
'playlist_mincount': 418,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
video_info['onDemand'], ie=MicrosoftMediusIE, url_transparent=True, **traverse_obj(video_info, {
|
||||
'id': ('sessionId', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('startDateTime', {parse_iso8601}),
|
||||
}))
|
||||
for video_info in self._download_json(
|
||||
'https://api-v2.build.microsoft.com/api/session/all/en-US', video_id, 'Downloading video info')
|
||||
]
|
||||
if video_id == 'sessions':
|
||||
return self.playlist_result(entries, video_id)
|
||||
else:
|
||||
return traverse_obj(entries, (lambda _, v: v['id'] == video_id), get_all=False)
|
||||
|
||||
@@ -1,188 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
|
||||
def _extract_base_url(self, course_id, display_id):
|
||||
return self._download_json(
|
||||
f'https://api-mlxprod.microsoft.com/services/products/anonymous/{course_id}',
|
||||
display_id, 'Downloading course base URL')
|
||||
|
||||
def _extract_chapter_and_title(self, title):
|
||||
if not title:
|
||||
return None, None
|
||||
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
|
||||
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
|
||||
IE_NAME = 'mva'
|
||||
IE_DESC = 'Microsoft Virtual Academy videos'
|
||||
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
|
||||
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
|
||||
'info_dict': {
|
||||
'id': 'gfVXISmEB_6804984382',
|
||||
'ext': 'mp4',
|
||||
'title': 'Course Introduction',
|
||||
'formats': 'mincount:3',
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
}],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'mva:11788:gfVXISmEB_6804984382',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
course_id = mobj.group('course_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
|
||||
|
||||
settings = self._download_xml(
|
||||
f'{base_url}/content/content_{video_id}/videosettings.xml?v=1',
|
||||
video_id, 'Downloading video settings XML')
|
||||
|
||||
_, title = self._extract_chapter_and_title(xpath_text(
|
||||
settings, './/Title', 'title', fatal=True))
|
||||
|
||||
formats = []
|
||||
|
||||
for sources in settings.findall('.//MediaSources'):
|
||||
sources_type = sources.get('videoType')
|
||||
for source in sources.findall('./MediaSource'):
|
||||
video_url = source.text
|
||||
if not video_url or not video_url.startswith('http'):
|
||||
continue
|
||||
if sources_type == 'smoothstreaming':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
video_url, video_id, 'mss', fatal=False))
|
||||
continue
|
||||
video_mode = source.get('videoMode')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
|
||||
codec = source.get('codec')
|
||||
acodec, vcodec = [None] * 2
|
||||
if codec:
|
||||
codecs = codec.split(',')
|
||||
if len(codecs) == 2:
|
||||
acodec, vcodec = codecs
|
||||
elif len(codecs) == 1:
|
||||
vcodec = codecs[0]
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': video_mode,
|
||||
'height': height,
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for source in settings.findall('.//MarkerResourceSource'):
|
||||
subtitle_url = source.text
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': f'{base_url}/{subtitle_url}',
|
||||
'ext': source.get('type'),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
|
||||
IE_NAME = 'mva:course'
|
||||
IE_DESC = 'Microsoft Virtual Academy courses'
|
||||
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||
'info_dict': {
|
||||
'id': '11788',
|
||||
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
|
||||
},
|
||||
'playlist_count': 36,
|
||||
}, {
|
||||
# with emphasized chapters
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
|
||||
'info_dict': {
|
||||
'id': '16335',
|
||||
'title': 'Developing Windows 10 Games with Construct 2',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'mva:course:11788',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MicrosoftVirtualAcademyIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
course_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
base_url = self._extract_base_url(course_id, display_id)
|
||||
|
||||
manifest = self._download_json(
|
||||
f'{base_url}/imsmanifestlite.json',
|
||||
display_id, 'Downloading course manifest JSON')['manifest']
|
||||
|
||||
organization = manifest['organizations']['organization'][0]
|
||||
|
||||
entries = []
|
||||
for chapter in organization['item']:
|
||||
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
|
||||
chapter_id = chapter.get('@identifier')
|
||||
for item in chapter.get('item', []):
|
||||
item_id = item.get('@identifier')
|
||||
if not item_id:
|
||||
continue
|
||||
metadata = item.get('resource', {}).get('metadata') or {}
|
||||
if metadata.get('learningresourcetype') != 'Video':
|
||||
continue
|
||||
_, title = self._extract_chapter_and_title(item.get('title'))
|
||||
duration = parse_duration(metadata.get('duration'))
|
||||
description = metadata.get('description')
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
f'mva:{course_id}:{item_id}', {'base_url': base_url}),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'chapter': chapter_title,
|
||||
'chapter_number': chapter_number,
|
||||
'chapter_id': chapter_id,
|
||||
})
|
||||
|
||||
title = organization.get('title') or manifest.get('metadata', {}).get('title')
|
||||
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
@@ -9,9 +9,10 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MLBBaseIE(InfoExtractor):
|
||||
@@ -326,15 +327,20 @@ class MLBTVIE(InfoExtractor):
|
||||
video_id)['data']['Airings']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for airing in airings:
|
||||
m3u8_url = self._download_json(
|
||||
for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
|
||||
format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
|
||||
m3u8_url = traverse_obj(self._download_json(
|
||||
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
|
||||
headers={
|
||||
note=f'Downloading {format_id} stream info JSON',
|
||||
errnote=f'Failed to download {format_id} stream info, skipping',
|
||||
fatal=False, headers={
|
||||
'Authorization': self._access_token,
|
||||
'Accept': 'application/vnd.media-service+json; version=2',
|
||||
})['stream']['complete']
|
||||
}), ('stream', 'complete', {url_or_none}))
|
||||
if not m3u8_url:
|
||||
continue
|
||||
f, s = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage')))
|
||||
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
formats.extend(f)
|
||||
self._merge_subtitles(s, target=subtitles)
|
||||
|
||||
|
||||
@@ -5,39 +5,103 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
parse_count,
|
||||
remove_end,
|
||||
update_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class MurrtubeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
murrtube:|
|
||||
https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+)\-
|
||||
https?://murrtube\.net/(?:v/|videos/(?P<slug>[a-z0-9-]+?)-)
|
||||
)
|
||||
(?P<id>[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
|
||||
(?P<id>[A-Z0-9]{4}|[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})
|
||||
'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
||||
'md5': '169f494812d9a90914b42978e73aa690',
|
||||
'md5': '70380878a77e8565d4aea7f68b8bbb35',
|
||||
'info_dict': {
|
||||
'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
||||
'id': 'ca885d8456b95de529b6723b158032e11115d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inferno X Skyler',
|
||||
'description': 'Humping a very good slutty sheppy (roomate)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 284,
|
||||
'uploader': 'Inferno Wolf',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://storage.murrtube.net/murrtube-production/ekbs3zcfvuynnqfx72nn2tkokvsd',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'],
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://murrtube.net/v/0J2Q',
|
||||
'md5': '31262f6ac56f0ca75e5a54a0f3fefcb6',
|
||||
'info_dict': {
|
||||
'id': '8442998c52134968d9caa36e473e1a6bac6ca',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Hayel',
|
||||
'title': 'Who\'s in charge now?',
|
||||
'description': 'md5:795791e97e5b0f1805ea84573f02a997',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://storage.murrtube.net/murrtube-production/fb1ojjwiucufp34ya6hxu5vfqi5s',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_count(self, name, html):
|
||||
return parse_count(self._search_regex(
|
||||
rf'([\d,]+)\s+<span[^>]*>{name}</span>', html, name, default=None))
|
||||
|
||||
def _real_initialize(self):
|
||||
homepage = self._download_webpage(
|
||||
'https://murrtube.net', None, note='Getting session token')
|
||||
self._request_webpage(
|
||||
'https://murrtube.net/accept_age_check', None, 'Setting age cookie',
|
||||
data=urlencode_postdata(self._hidden_inputs(homepage)))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if video_id.startswith('murrtube:'):
|
||||
raise ExtractorError('Support for murrtube: prefix URLs is broken')
|
||||
video_page = self._download_webpage(url, video_id)
|
||||
video_attrs = extract_attributes(get_element_html_by_id('video', video_page))
|
||||
playlist = update_url(video_attrs['data-url'], query=None)
|
||||
video_id = self._search_regex(r'/([\da-f]+)/index.m3u8', playlist, 'video id')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': remove_end(self._og_search_title(video_page), ' - Murrtube'),
|
||||
'age_limit': 18,
|
||||
'formats': self._extract_m3u8_formats(playlist, video_id, 'mp4'),
|
||||
'description': self._og_search_description(video_page),
|
||||
'thumbnail': update_url(self._og_search_thumbnail(video_page, default=''), query=None) or None,
|
||||
'uploader': clean_html(get_element_by_class('pl-1 is-size-6 has-text-lighter', video_page)),
|
||||
'view_count': self._extract_count('Views', video_page),
|
||||
'like_count': self._extract_count('Likes', video_page),
|
||||
'comment_count': self._extract_count('Comments', video_page),
|
||||
}
|
||||
|
||||
|
||||
class MurrtubeUserIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_DESC = 'Murrtube user profile'
|
||||
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
|
||||
_TESTS = [{
|
||||
'url': 'https://murrtube.net/stormy',
|
||||
'info_dict': {
|
||||
'id': 'stormy',
|
||||
},
|
||||
'playlist_mincount': 27,
|
||||
}]
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _download_gql(self, video_id, op, note=None, fatal=True):
|
||||
result = self._download_json(
|
||||
@@ -46,73 +110,6 @@ class MurrtubeIE(InfoExtractor):
|
||||
headers={'Content-Type': 'application/json'})
|
||||
return result['data']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_gql(video_id, {
|
||||
'operationName': 'Medium',
|
||||
'variables': {
|
||||
'id': video_id,
|
||||
},
|
||||
'query': '''\
|
||||
query Medium($id: ID!) {
|
||||
medium(id: $id) {
|
||||
title
|
||||
description
|
||||
key
|
||||
duration
|
||||
commentsCount
|
||||
likesCount
|
||||
viewsCount
|
||||
thumbnailKey
|
||||
tagList
|
||||
user {
|
||||
name
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}'''})
|
||||
meta = data['medium']
|
||||
|
||||
storage_url = 'https://storage.murrtube.net/murrtube/'
|
||||
format_url = storage_url + meta.get('key', '')
|
||||
thumbnail = storage_url + meta.get('thumbnailKey', '')
|
||||
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
|
||||
else:
|
||||
formats = [{'url': format_url}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': meta.get('title'),
|
||||
'description': meta.get('description'),
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(meta.get('duration')),
|
||||
'uploader': try_get(meta, lambda x: x['user']['name']),
|
||||
'view_count': meta.get('viewsCount'),
|
||||
'like_count': meta.get('likesCount'),
|
||||
'comment_count': meta.get('commentsCount'),
|
||||
'tags': meta.get('tagList'),
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
|
||||
_WORKING = False
|
||||
IE_DESC = 'Murrtube user profile'
|
||||
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
|
||||
_TEST = {
|
||||
'url': 'https://murrtube.net/stormy',
|
||||
'info_dict': {
|
||||
'id': 'stormy',
|
||||
},
|
||||
'playlist_mincount': 27,
|
||||
}
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _fetch_page(self, username, user_id, page):
|
||||
data = self._download_gql(username, {
|
||||
'operationName': 'Media',
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
@@ -498,10 +499,8 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
m3u8_id=format_id, fatal=False))
|
||||
continue
|
||||
tbr = int_or_none(va.get('bitrate'), 1000)
|
||||
if tbr:
|
||||
format_id += f'-{tbr}'
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'format_id': join_nonempty(format_id, tbr),
|
||||
'url': public_url,
|
||||
'width': int_or_none(va.get('width')),
|
||||
'height': int_or_none(va.get('height')),
|
||||
|
||||
@@ -22,12 +22,22 @@ from ..utils import (
|
||||
|
||||
|
||||
class NetEaseMusicBaseIE(InfoExtractor):
|
||||
_FORMATS = ['bMusic', 'mMusic', 'hMusic']
|
||||
# XXX: _extract_formats logic depends on the order of the levels in each tier
|
||||
_LEVELS = (
|
||||
'standard', # free tier; 标准; 128kbps mp3 or aac
|
||||
'higher', # free tier; 192kbps mp3 or aac
|
||||
'exhigh', # free tier; 极高 (HQ); 320kbps mp3 or aac
|
||||
'lossless', # VIP tier; 无损 (SQ); 48kHz/16bit flac
|
||||
'hires', # VIP tier; 高解析度无损 (Hi-Res); 192kHz/24bit flac
|
||||
'jyeffect', # VIP tier; 高清臻音 (Spatial Audio); 96kHz/24bit flac
|
||||
'jymaster', # SVIP tier; 超清母带 (Master); 192kHz/24bit flac
|
||||
'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac
|
||||
)
|
||||
_API_BASE = 'http://music.163.com/api/'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@staticmethod
|
||||
def kilo_or_none(value):
|
||||
def _kilo_or_none(value):
|
||||
return int_or_none(value, scale=1000)
|
||||
|
||||
def _create_eapi_cipher(self, api_path, query_body, cookies):
|
||||
@@ -66,45 +76,43 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
**headers,
|
||||
}, **kwargs)
|
||||
|
||||
def _call_player_api(self, song_id, bitrate):
|
||||
def _call_player_api(self, song_id, level):
|
||||
return self._download_eapi_json(
|
||||
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate},
|
||||
note=f'Downloading song URL info: bitrate {bitrate}')
|
||||
'/song/enhance/player/url/v1', song_id,
|
||||
{'ids': f'[{song_id}]', 'level': level, 'encodeType': 'flac'},
|
||||
note=f'Downloading song URL info: level {level}')
|
||||
|
||||
def extract_formats(self, info):
|
||||
err = 0
|
||||
def _extract_formats(self, info):
|
||||
formats = []
|
||||
song_id = info['id']
|
||||
for song_format in self._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
for level in self._LEVELS:
|
||||
song = traverse_obj(
|
||||
self._call_player_api(song_id, level), ('data', lambda _, v: url_or_none(v['url']), any))
|
||||
if not song:
|
||||
break # Media is not available due to removal or geo-restriction
|
||||
actual_level = song.get('level')
|
||||
if actual_level and actual_level != level:
|
||||
if level in ('lossless', 'jymaster'):
|
||||
break # We've already extracted the highest level of the user's account tier
|
||||
continue
|
||||
bitrate = int_or_none(details.get('bitrate')) or 999000
|
||||
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
|
||||
song_url = song['url']
|
||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'format_id': song_format,
|
||||
'asr': traverse_obj(details, ('sr', {int_or_none})),
|
||||
**traverse_obj(song, {
|
||||
'ext': ('type', {str}),
|
||||
'abr': ('br', {self.kilo_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
elif err == 0:
|
||||
err = traverse_obj(song, ('code', {int})) or 0
|
||||
|
||||
formats.append({
|
||||
'url': song['url'],
|
||||
'format_id': level,
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(song, {
|
||||
'ext': ('type', {str}),
|
||||
'abr': ('br', {self._kilo_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
if not actual_level:
|
||||
break # Only 1 level is available if API does not return a value (netease:program)
|
||||
if not formats:
|
||||
if err != 0 and (err < 200 or err >= 400):
|
||||
raise ExtractorError(f'No media links found (site code {err})', expected=True)
|
||||
else:
|
||||
self.raise_geo_restricted(
|
||||
'No media links found: probably due to geo restriction.', countries=['CN'])
|
||||
self.raise_geo_restricted(
|
||||
'No media links found; possibly due to geo restriction', countries=['CN'])
|
||||
return formats
|
||||
|
||||
def query_api(self, endpoint, video_id, note):
|
||||
def _query_api(self, endpoint, video_id, note):
|
||||
result = self._download_json(
|
||||
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
|
||||
code = traverse_obj(result, ('code', {int}))
|
||||
@@ -128,32 +136,29 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:song'
|
||||
IE_DESC = '网易云音乐'
|
||||
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://music.163.com/#/song?id=548648087',
|
||||
'url': 'https://music.163.com/#/song?id=550136151',
|
||||
'info_dict': {
|
||||
'id': '548648087',
|
||||
'id': '550136151',
|
||||
'ext': 'mp3',
|
||||
'title': '戒烟 (Live)',
|
||||
'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊',
|
||||
'title': 'It\'s Ok (Live)',
|
||||
'creators': 'count:10',
|
||||
'timestamp': 1522944000,
|
||||
'upload_date': '20180405',
|
||||
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
|
||||
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
|
||||
'duration': 256,
|
||||
'description': 'md5:9fd07059c2ccee3950dc8363429a3135',
|
||||
'duration': 197,
|
||||
'thumbnail': r're:^http.*\.jpg',
|
||||
'album': '偶像练习生 表演曲目合集',
|
||||
'average_rating': int,
|
||||
'album_artist': '偶像练习生',
|
||||
'album_artists': ['偶像练习生'],
|
||||
},
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
'info_dict': {
|
||||
'id': '17241424',
|
||||
'ext': 'mp3',
|
||||
'title': 'Opus 28',
|
||||
'creator': 'Dustin O\'Halloran',
|
||||
'upload_date': '20080211',
|
||||
'timestamp': 1202745600,
|
||||
'duration': 263,
|
||||
@@ -161,15 +166,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'album': 'Piano Solos Vol. 2',
|
||||
'album_artist': 'Dustin O\'Halloran',
|
||||
'average_rating': int,
|
||||
'description': '[00:05.00]纯音乐,请欣赏\n',
|
||||
'album_artists': ['Dustin O\'Halloran'],
|
||||
'creators': ['Dustin O\'Halloran'],
|
||||
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||
'md5': '95826c73ea50b1c288b22180ec9e754d',
|
||||
'md5': 'b896be78d8d34bd7bb665b26710913ff',
|
||||
'info_dict': {
|
||||
'id': '95670',
|
||||
'ext': 'mp3',
|
||||
'title': '国际歌',
|
||||
'creator': '马备',
|
||||
'upload_date': '19911130',
|
||||
'timestamp': 691516800,
|
||||
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
||||
@@ -180,6 +188,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'average_rating': int,
|
||||
'album': '红色摇滚',
|
||||
'album_artist': '侯牧人',
|
||||
'creators': ['马备'],
|
||||
'album_artists': ['侯牧人'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://music.163.com/#/song?id=32102397',
|
||||
@@ -188,7 +198,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'id': '32102397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bad Blood',
|
||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||
'creators': ['Taylor Swift', 'Kendrick Lamar'],
|
||||
'upload_date': '20150516',
|
||||
'timestamp': 1431792000,
|
||||
'description': 'md5:21535156efb73d6d1c355f95616e285a',
|
||||
@@ -207,7 +217,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'id': '22735043',
|
||||
'ext': 'mp3',
|
||||
'title': '소원을 말해봐 (Genie)',
|
||||
'creator': '少女时代',
|
||||
'creators': ['少女时代'],
|
||||
'upload_date': '20100127',
|
||||
'timestamp': 1264608000,
|
||||
'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
|
||||
@@ -251,12 +261,12 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
|
||||
|
||||
formats = self.extract_formats(info)
|
||||
formats = self._extract_formats(info)
|
||||
|
||||
lyrics = self._process_lyrics(self.query_api(
|
||||
lyrics = self._process_lyrics(self._query_api(
|
||||
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
|
||||
lyric_data = {
|
||||
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
|
||||
@@ -267,14 +277,14 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'id': song_id,
|
||||
'formats': formats,
|
||||
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
|
||||
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
|
||||
'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None,
|
||||
'creators': traverse_obj(info, ('artists', ..., 'name')) or None,
|
||||
'album_artists': traverse_obj(info, ('album', 'artists', ..., 'name')) or None,
|
||||
**lyric_data,
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
|
||||
'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
|
||||
'thumbnail': ('album', 'picUrl', {url_or_none}),
|
||||
'duration': ('duration', {self.kilo_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'album': ('album', 'name', {str}),
|
||||
'average_rating': ('score', {int_or_none}),
|
||||
}),
|
||||
@@ -284,7 +294,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:album'
|
||||
IE_DESC = '网易云音乐 - 专辑'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?album\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://music.163.com/#/album?id=133153666',
|
||||
'info_dict': {
|
||||
@@ -294,7 +304,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
'description': '桃几2021年翻唱合集',
|
||||
'thumbnail': r're:^http.*\.jpg',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'http://music.163.com/#/album?id=220780',
|
||||
'info_dict': {
|
||||
@@ -328,7 +338,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:singer'
|
||||
IE_DESC = '网易云音乐 - 歌手'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?artist\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'note': 'Singer has aliases.',
|
||||
'url': 'http://music.163.com/#/artist?id=10559',
|
||||
@@ -358,7 +368,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
def _real_extract(self, url):
|
||||
singer_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
|
||||
|
||||
name = join_nonempty(
|
||||
@@ -372,7 +382,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:playlist'
|
||||
IE_DESC = '网易云音乐 - 歌单'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/playlist?id=79177352',
|
||||
'info_dict': {
|
||||
@@ -405,11 +415,15 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
|
||||
'info_dict': {
|
||||
'id': '3733003',
|
||||
'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
|
||||
'title': 're:韩国Melon排行榜周榜(?: [0-9]{4}-[0-9]{2}-[0-9]{2})?',
|
||||
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
|
||||
'upload_date': '20200109',
|
||||
'uploader_id': '2937386',
|
||||
'tags': ['韩语', '榜单'],
|
||||
'uploader': 'Melon榜单',
|
||||
'timestamp': 1578569373,
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -426,7 +440,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
'tags': ('tags', ..., {str}),
|
||||
'uploader': ('creator', 'nickname', {str}),
|
||||
'uploader_id': ('creator', 'userId', {str_or_none}),
|
||||
'timestamp': ('updateTime', {self.kilo_or_none}),
|
||||
'timestamp': ('updateTime', {self._kilo_or_none}),
|
||||
}))
|
||||
if traverse_obj(info, ('playlist', 'specialType')) == 10:
|
||||
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
|
||||
@@ -437,7 +451,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:mv'
|
||||
IE_DESC = '网易云音乐 - MV'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?mv\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://music.163.com/#/mv?id=10958064',
|
||||
'info_dict': {
|
||||
@@ -445,7 +459,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': '交换余生',
|
||||
'description': 'md5:e845872cff28820642a2b02eda428fea',
|
||||
'creator': '林俊杰',
|
||||
'creators': ['林俊杰'],
|
||||
'upload_date': '20200916',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
'duration': 364,
|
||||
@@ -460,7 +474,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': '이럴거면 그러지말지',
|
||||
'description': '白雅言自作曲唱甜蜜爱情',
|
||||
'creator': '白娥娟',
|
||||
'creators': ['白娥娟'],
|
||||
'upload_date': '20150520',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
'duration': 216,
|
||||
@@ -468,12 +482,28 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'This MV has multiple creators.',
|
||||
'url': 'https://music.163.com/#/mv?id=22593543',
|
||||
'info_dict': {
|
||||
'id': '22593543',
|
||||
'ext': 'mp4',
|
||||
'title': '老北京杀器',
|
||||
'creators': ['秃子2z', '辉子', 'Saber梁维嘉'],
|
||||
'duration': 206,
|
||||
'upload_date': '20240618',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mv_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
|
||||
|
||||
formats = [
|
||||
@@ -484,13 +514,13 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
return {
|
||||
'id': mv_id,
|
||||
'formats': formats,
|
||||
'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
|
||||
'creator': ('artistName', {str}),
|
||||
'upload_date': ('publishTime', {unified_strdate}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'duration': ('duration', {self.kilo_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
@@ -501,7 +531,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:program'
|
||||
IE_DESC = '网易云音乐 - 电台节目'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/program?id=10109055',
|
||||
'info_dict': {
|
||||
@@ -509,7 +539,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': '不丹足球背后的故事',
|
||||
'description': '喜马拉雅人的足球梦 ...',
|
||||
'creator': '大话西藏',
|
||||
'creators': ['大话西藏'],
|
||||
'timestamp': 1434179287,
|
||||
'upload_date': '20150613',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
@@ -522,7 +552,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'id': '10141022',
|
||||
'title': '滚滚电台的有声节目',
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
'creator': '滚滚电台ORZ',
|
||||
'creators': ['滚滚电台ORZ'],
|
||||
'timestamp': 1434450733,
|
||||
'upload_date': '20150616',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
@@ -536,7 +566,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': '滚滚电台的有声节目',
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
'creator': '滚滚电台ORZ',
|
||||
'creators': ['滚滚电台ORZ'],
|
||||
'timestamp': 1434450733,
|
||||
'upload_date': '20150616',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
@@ -550,7 +580,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
def _real_extract(self, url):
|
||||
program_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
|
||||
|
||||
metainfo = traverse_obj(info, {
|
||||
@@ -558,17 +588,17 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'description': ('description', {str}),
|
||||
'creator': ('dj', 'brand', {str}),
|
||||
'thumbnail': ('coverUrl', {url_or_none}),
|
||||
'timestamp': ('createTime', {self.kilo_or_none}),
|
||||
'timestamp': ('createTime', {self._kilo_or_none}),
|
||||
})
|
||||
|
||||
if not self._yes_playlist(
|
||||
info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
|
||||
formats = self.extract_formats(info['mainSong'])
|
||||
formats = self._extract_formats(info['mainSong'])
|
||||
|
||||
return {
|
||||
'id': str(info['mainSong']['id']),
|
||||
'formats': formats,
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
|
||||
**metainfo,
|
||||
}
|
||||
|
||||
@@ -579,7 +609,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:djradio'
|
||||
IE_DESC = '网易云音乐 - 电台'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?djradio\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://music.163.com/#/djradio?id=42',
|
||||
'info_dict': {
|
||||
@@ -597,7 +627,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||
metainfo = {}
|
||||
entries = []
|
||||
for offset in itertools.count(start=0, step=self._PAGE_SIZE):
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
|
||||
dj_id, note=f'Downloading dj programs - {offset}')
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
@@ -41,7 +42,7 @@ class NHLBaseIE(InfoExtractor):
|
||||
else:
|
||||
height = int_or_none(playback.get('height'))
|
||||
formats.append({
|
||||
'format_id': playback.get('name', 'http' + (f'-{height}p' if height else '')),
|
||||
'format_id': playback.get('name') or join_nonempty('http', height and f'{height}p'),
|
||||
'url': playback_url,
|
||||
'width': int_or_none(playback.get('width')),
|
||||
'height': height,
|
||||
|
||||
@@ -43,15 +43,17 @@ class NuumBaseIE(InfoExtractor):
|
||||
is_live = media.get('media_status') == 'RUNNING'
|
||||
|
||||
formats, subtitles = None, None
|
||||
headers = {'Referer': 'https://nuum.ru/'}
|
||||
if extract_formats:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live)
|
||||
media_url, video_id, 'mp4', live=is_live, headers=headers)
|
||||
|
||||
return filter_dict({
|
||||
'id': video_id,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
**traverse_obj(container, {
|
||||
'title': ('media_container_name', {str}),
|
||||
'description': ('media_container_description', {str}),
|
||||
@@ -78,7 +80,7 @@ class NuumMediaIE(NuumBaseIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
|
||||
'md5': 'f1d9118a30403e32b702a204eb03aca3',
|
||||
'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
|
||||
'info_dict': {
|
||||
'id': '1567547',
|
||||
'ext': 'mp4',
|
||||
|
||||
@@ -550,7 +550,8 @@ class ORFONIE(InfoExtractor):
|
||||
return self._extract_video_info(segment_id, selected_segment)
|
||||
|
||||
# Even some segmented videos have an unsegmented version available in API response root
|
||||
if not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none})):
|
||||
if (self._configuration_arg('prefer_segments_playlist')
|
||||
or not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none}))):
|
||||
return self.playlist_result(
|
||||
(self._extract_video_info(str(segment['id']), segment) for segment in segments),
|
||||
video_id, **self._parse_metadata(api_json), multi_video=True)
|
||||
|
||||
@@ -2,6 +2,7 @@ import itertools
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .sproutvideo import VidsIoIE
|
||||
from .vimeo import VimeoIE
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
@@ -12,6 +13,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
@@ -305,22 +307,28 @@ class PatreonIE(PatreonBaseIE):
|
||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||
}))
|
||||
|
||||
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
|
||||
headers = {'referer': 'https://patreon.com/'}
|
||||
|
||||
# handle Vimeo embeds
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
v_url = urllib.parse.unquote(self._html_search_regex(
|
||||
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
||||
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
||||
if url_or_none(v_url) and self._request_webpage(
|
||||
v_url, video_id, 'Checking Vimeo embed URL',
|
||||
headers={'Referer': 'https://patreon.com/'},
|
||||
fatal=False, errnote=False):
|
||||
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
|
||||
entries.append(self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE, url_transparent=True))
|
||||
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
|
||||
entries.append(self.url_result(embed_url))
|
||||
if embed_url and (urlh := self._request_webpage(
|
||||
embed_url, video_id, 'Checking embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=403)):
|
||||
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
|
||||
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
|
||||
entries.append(self.url_result(smuggle_url(embed_url, headers)))
|
||||
|
||||
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
||||
if post_file:
|
||||
|
||||
@@ -41,7 +41,7 @@ class PelotonIE(InfoExtractor):
|
||||
}, 'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'_skip': 'Account needed',
|
||||
'skip': 'Account needed',
|
||||
}, {
|
||||
'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
|
||||
'info_dict': {
|
||||
@@ -61,7 +61,7 @@ class PelotonIE(InfoExtractor):
|
||||
}, 'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'_skip': 'Account needed',
|
||||
'skip': 'Account needed',
|
||||
}]
|
||||
|
||||
_MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
|
||||
@@ -199,7 +199,7 @@ class PelotonLiveIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'_skip': 'Account needed',
|
||||
'skip': 'Account needed',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import int_or_none, join_nonempty
|
||||
|
||||
|
||||
class PerformGroupIE(InfoExtractor):
|
||||
@@ -50,11 +50,8 @@ class PerformGroupIE(InfoExtractor):
|
||||
if not c_url:
|
||||
continue
|
||||
tbr = int_or_none(c.get('bitrate'), 1000)
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += f'-{tbr}'
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'format_id': join_nonempty('http', tbr),
|
||||
'url': c_url,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(c.get('width')),
|
||||
|
||||
@@ -1,28 +1,40 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def result_from_props(props, episode_id=None):
|
||||
def result_from_props(props):
|
||||
return {
|
||||
'id': props.get('podcast_id') or episode_id,
|
||||
'title': props.get('title'),
|
||||
'url': props['mediaURL'],
|
||||
**traverse_obj(props, {
|
||||
'id': ('_id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'url': ('mediaURL', {url_or_none}),
|
||||
'description': ('description', {clean_html}),
|
||||
'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'ext': 'mp3',
|
||||
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
|
||||
'timestamp': props.get('timestamp'),
|
||||
'duration': int_or_none(props.get('duration')),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
|
||||
class PodbayFMIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
|
||||
_VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
|
||||
'md5': '98b41285dcf7989d105a4ed0404054cf',
|
||||
'md5': '895ac8505de349515f5ee8a4a3195c93',
|
||||
'info_dict': {
|
||||
'id': '1647338400',
|
||||
'id': '62306451f4a48e58d0c4d6a8',
|
||||
'title': 'Part One: Kissinger',
|
||||
'ext': 'mp3',
|
||||
'description': r're:^We begin our epic six part series on Henry Kissinger.+',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1647338400,
|
||||
'duration': 5001,
|
||||
@@ -34,24 +46,25 @@ class PodbayFMIE(InfoExtractor):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
data = self._search_nextjs_data(webpage, episode_id)
|
||||
return result_from_props(data['props']['pageProps']['episode'], episode_id)
|
||||
return result_from_props(data['props']['pageProps']['episode'])
|
||||
|
||||
|
||||
class PodbayFMChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
|
||||
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://podbay.fm/p/behind-the-bastards',
|
||||
'info_dict': {
|
||||
'id': 'behind-the-bastards',
|
||||
'title': 'Behind the Bastards',
|
||||
},
|
||||
'playlist_mincount': 21,
|
||||
}]
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _fetch_page(self, channel_id, pagenum):
|
||||
return self._download_json(
|
||||
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
|
||||
channel_id)['podcast']
|
||||
f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast']
|
||||
|
||||
@staticmethod
|
||||
def _results_from_page(channel_id, page):
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PokerGoBaseIE(InfoExtractor):
|
||||
@@ -65,7 +66,7 @@ class PokerGoIE(PokerGoBaseIE):
|
||||
'width': image.get('width'),
|
||||
'height': image.get('height'),
|
||||
} for image in data_json.get('images') or [] if image.get('url')]
|
||||
series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == video_id) or {}
|
||||
series_json = traverse_obj(data_json, ('show_tags', lambda _, v: v['video_id'] == video_id, any)) or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import datetime as dt
|
||||
import functools
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import functools
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
|
||||
@@ -7,6 +7,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -147,13 +148,13 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
||||
'page_url': 'http://www.prosieben.de',
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp{}'.format(f'-{tbr}' if tbr else ''),
|
||||
'format_id': join_nonempty('rtmp', tbr),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'tbr': tbr,
|
||||
'format_id': 'http{}'.format(f'-{tbr}' if tbr else ''),
|
||||
'format_id': join_nonempty('http', tbr),
|
||||
})
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,48 +1,125 @@
|
||||
import base64
|
||||
import functools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class QQMusicIE(InfoExtractor):
|
||||
class QQMusicBaseIE(InfoExtractor):
|
||||
def _get_cookie(self, key, default=None):
|
||||
return getattr(self._get_cookies('https://y.qq.com').get(key), 'value', default)
|
||||
|
||||
def _get_g_tk(self):
|
||||
n = 5381
|
||||
for c in self._get_cookie('qqmusic_key', ''):
|
||||
n += (n << 5) + ord(c)
|
||||
return n & 2147483647
|
||||
|
||||
def _get_uin(self):
|
||||
return int_or_none(self._get_cookie('uin')) or 0
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_uin() and self._get_cookie('fqm_pvqid'))
|
||||
|
||||
# Reference: m_r_GetRUin() in top_player.js
|
||||
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
|
||||
@staticmethod
|
||||
def _m_r_get_ruin():
|
||||
cur_ms = int(time.time() * 1000) % 1000
|
||||
return int(round(random.random() * 2147483647) * cur_ms % 1E10)
|
||||
|
||||
def _download_init_data(self, url, mid, fatal=True):
|
||||
webpage = self._download_webpage(url, mid, fatal=fatal)
|
||||
return self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage,
|
||||
'init data', mid, transform_source=js_to_json, fatal=fatal)
|
||||
|
||||
def _make_fcu_req(self, req_dict, mid, headers={}, **kwargs):
|
||||
return self._download_json(
|
||||
'https://u.y.qq.com/cgi-bin/musicu.fcg', mid, data=json.dumps({
|
||||
'comm': {
|
||||
'cv': 0,
|
||||
'ct': 24,
|
||||
'format': 'json',
|
||||
'uin': self._get_uin(),
|
||||
},
|
||||
**req_dict,
|
||||
}, separators=(',', ':')).encode(), headers=headers, **kwargs)
|
||||
|
||||
|
||||
class QQMusicIE(QQMusicBaseIE):
|
||||
IE_NAME = 'qqmusic'
|
||||
IE_DESC = 'QQ音乐'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/songDetail/(?P<id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
|
||||
'url': 'https://y.qq.com/n/ryqq/songDetail/004Ti8rT003TaZ',
|
||||
'md5': 'd7adc5c438d12e2cb648cca81593fd47',
|
||||
'info_dict': {
|
||||
'id': '004Ti8rT003TaZ',
|
||||
'ext': 'mp3',
|
||||
'title': '永夜のパレード (永夜的游行)',
|
||||
'album': '幻想遊園郷 -Fantastic Park-',
|
||||
'release_date': '20111230',
|
||||
'duration': 281,
|
||||
'creators': ['ケーキ姫', 'JUMA'],
|
||||
'genres': ['Pop'],
|
||||
'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9',
|
||||
'size': 4501244,
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||
'subtitles': 'count:1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://y.qq.com/n/ryqq/songDetail/004295Et37taLD',
|
||||
'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
|
||||
'info_dict': {
|
||||
'id': '004295Et37taLD',
|
||||
'ext': 'mp3',
|
||||
'title': '可惜没如果',
|
||||
'release_date': '20141227',
|
||||
'creator': '林俊杰',
|
||||
'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'album': '新地球 - 人 (Special Edition)',
|
||||
'release_date': '20150129',
|
||||
'duration': 298,
|
||||
'creators': ['林俊杰'],
|
||||
'genres': ['Pop'],
|
||||
'description': 'md5:f568421ff618d2066e74b65a04149c4e',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||
},
|
||||
'skip': 'premium member only',
|
||||
}, {
|
||||
'note': 'There is no mp3-320 version of this song.',
|
||||
'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
|
||||
'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
|
||||
'url': 'https://y.qq.com/n/ryqq/songDetail/004MsGEo3DdNxV',
|
||||
'md5': '028aaef1ae13d8a9f4861a92614887f9',
|
||||
'info_dict': {
|
||||
'id': '004MsGEo3DdNxV',
|
||||
'ext': 'mp3',
|
||||
'title': '如果',
|
||||
'album': '新传媒电视连续剧金曲系列II',
|
||||
'release_date': '20050626',
|
||||
'creator': '李季美',
|
||||
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 220,
|
||||
'creators': ['李季美'],
|
||||
'genres': [],
|
||||
'description': 'md5:fc711212aa623b28534954dc4bd67385',
|
||||
'size': 3535730,
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||
},
|
||||
}, {
|
||||
'note': 'lyrics not in .lrc format',
|
||||
'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
|
||||
'url': 'https://y.qq.com/n/ryqq/songDetail/001JyApY11tIp6',
|
||||
'info_dict': {
|
||||
'id': '001JyApY11tIp6',
|
||||
'ext': 'mp3',
|
||||
@@ -50,185 +127,193 @@ class QQMusicIE(InfoExtractor):
|
||||
'release_date': '19970225',
|
||||
'creator': 'Dark Funeral',
|
||||
'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'no longer available',
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
|
||||
'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
|
||||
'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10},
|
||||
'F000': {'name': 'flac', 'prefix': 'F000', 'ext': 'flac', 'preference': 60},
|
||||
'A000': {'name': 'ape', 'prefix': 'A000', 'ext': 'ape', 'preference': 50},
|
||||
'M800': {'name': '320mp3', 'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
|
||||
'M500': {'name': '128mp3', 'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
|
||||
'C400': {'name': '96aac', 'prefix': 'C400', 'ext': 'm4a', 'preference': 20, 'abr': 96},
|
||||
'C200': {'name': '48aac', 'prefix': 'C200', 'ext': 'm4a', 'preference': 20, 'abr': 48},
|
||||
}
|
||||
|
||||
# Reference: m_r_GetRUin() in top_player.js
|
||||
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
|
||||
@staticmethod
|
||||
def m_r_get_ruin():
|
||||
cur_ms = int(time.time() * 1000) % 1000
|
||||
return int(round(random.random() * 2147483647) * cur_ms % 1E10)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid = self._match_id(url)
|
||||
|
||||
detail_info_page = self._download_webpage(
|
||||
f'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid={mid}&play=0',
|
||||
mid, note='Download song detail info',
|
||||
errnote='Unable to get song detail info', encoding='gbk')
|
||||
init_data = self._download_init_data(url, mid, fatal=False)
|
||||
info_data = self._make_fcu_req({'info': {
|
||||
'module': 'music.pf_song_detail_svr',
|
||||
'method': 'get_song_detail_yqq',
|
||||
'param': {
|
||||
'song_mid': mid,
|
||||
'song_type': 0,
|
||||
},
|
||||
}}, mid, note='Downloading song info')['info']['data']['track_info']
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
r"songname:\s*'([^']+)'", detail_info_page, 'song name')
|
||||
media_mid = info_data['file']['media_mid']
|
||||
|
||||
publish_time = self._html_search_regex(
|
||||
r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
|
||||
'publish time', default=None)
|
||||
if publish_time:
|
||||
publish_time = publish_time.replace('-', '')
|
||||
|
||||
singer = self._html_search_regex(
|
||||
r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
|
||||
|
||||
lrc_content = self._html_search_regex(
|
||||
r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
|
||||
detail_info_page, 'LRC lyrics', default=None)
|
||||
if lrc_content:
|
||||
lrc_content = lrc_content.replace('\\n', '\n')
|
||||
|
||||
thumbnail_url = None
|
||||
albummid = self._search_regex(
|
||||
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
|
||||
detail_info_page, 'album mid', default=None)
|
||||
if albummid:
|
||||
thumbnail_url = f'http://i.gtimg.cn/music/photo/mid_album_500/{albummid[-2:-1]}/{albummid[-1]}/{albummid}.jpg'
|
||||
|
||||
guid = self.m_r_get_ruin()
|
||||
|
||||
vkey = self._download_json(
|
||||
f'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid={guid}',
|
||||
mid, note='Retrieve vkey', errnote='Unable to get vkey',
|
||||
transform_source=strip_jsonp)['key']
|
||||
data = self._make_fcu_req({
|
||||
'req_1': {
|
||||
'module': 'vkey.GetVkeyServer',
|
||||
'method': 'CgiGetVkey',
|
||||
'param': {
|
||||
'guid': str(self._m_r_get_ruin()),
|
||||
'songmid': [mid] * len(self._FORMATS),
|
||||
'songtype': [0] * len(self._FORMATS),
|
||||
'uin': str(self._get_uin()),
|
||||
'loginflag': 1,
|
||||
'platform': '20',
|
||||
'filename': [f'{f["prefix"]}{media_mid}.{f["ext"]}' for f in self._FORMATS.values()],
|
||||
},
|
||||
},
|
||||
'req_2': {
|
||||
'module': 'music.musichallSong.PlayLyricInfo',
|
||||
'method': 'GetPlayLyricInfo',
|
||||
'param': {'songMID': mid},
|
||||
},
|
||||
}, mid, note='Downloading formats and lyric', headers=self.geo_verification_headers())
|
||||
|
||||
code = traverse_obj(data, ('req_1', 'code', {int}))
|
||||
if code != 0:
|
||||
raise ExtractorError(f'Failed to download format info, error code {code or "unknown"}')
|
||||
formats = []
|
||||
for format_id, details in self._FORMATS.items():
|
||||
for media_info in traverse_obj(data, (
|
||||
'req_1', 'data', 'midurlinfo', lambda _, v: v['songmid'] == mid and v['purl']),
|
||||
):
|
||||
format_key = traverse_obj(media_info, ('filename', {str}, {lambda x: x[:4]}))
|
||||
format_info = self._FORMATS.get(format_key) or {}
|
||||
format_id = format_info.get('name')
|
||||
formats.append({
|
||||
'url': 'http://cc.stream.qqmusic.qq.com/{}{}.{}?vkey={}&guid={}&fromtag=0'.format(
|
||||
details['prefix'], mid, details['ext'], vkey, guid),
|
||||
'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']),
|
||||
'format': format_id,
|
||||
'format_id': format_id,
|
||||
'quality': details['preference'],
|
||||
'abr': details.get('abr'),
|
||||
'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})),
|
||||
'quality': format_info.get('preference'),
|
||||
'abr': format_info.get('abr'),
|
||||
'ext': format_info.get('ext'),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._check_formats(formats, mid)
|
||||
|
||||
actual_lrc_lyrics = ''.join(
|
||||
line + '\n' for line in re.findall(
|
||||
r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
|
||||
if not formats and not self.is_logged_in:
|
||||
self.raise_login_required()
|
||||
|
||||
if traverse_obj(data, ('req_2', 'code')):
|
||||
self.report_warning(f'Failed to download lyric, error {data["req_2"]["code"]!r}')
|
||||
lrc_content = traverse_obj(data, ('req_2', 'data', 'lyric', {lambda x: base64.b64decode(x).decode('utf-8')}))
|
||||
|
||||
info_dict = {
|
||||
'id': mid,
|
||||
'formats': formats,
|
||||
'title': song_name,
|
||||
'release_date': publish_time,
|
||||
'creator': singer,
|
||||
'description': lrc_content,
|
||||
'thumbnail': thumbnail_url,
|
||||
**traverse_obj(info_data, {
|
||||
'title': ('title', {str}),
|
||||
'album': ('album', 'title', {str}, {lambda x: x or None}),
|
||||
'release_date': ('time_public', {lambda x: x.replace('-', '') or None}),
|
||||
'creators': ('singer', ..., 'name', {str}),
|
||||
'alt_title': ('subtitle', {str}, {lambda x: x or None}),
|
||||
'duration': ('interval', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(init_data, ('detail', {
|
||||
'thumbnail': ('picurl', {url_or_none}),
|
||||
'description': ('info', 'intro', 'content', ..., 'value', {str}),
|
||||
'genres': ('info', 'genre', 'content', ..., 'value', {str}, all),
|
||||
}), get_all=False),
|
||||
}
|
||||
if actual_lrc_lyrics:
|
||||
info_dict['subtitles'] = {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
'data': actual_lrc_lyrics,
|
||||
}],
|
||||
}
|
||||
if lrc_content:
|
||||
info_dict['subtitles'] = {'origin': [{'ext': 'lrc', 'data': lrc_content}]}
|
||||
info_dict['description'] = join_nonempty(info_dict.get('description'), lrc_content, delim='\n')
|
||||
return info_dict
|
||||
|
||||
|
||||
class QQPlaylistBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def qq_static_url(category, mid):
|
||||
return f'http://y.qq.com/y/static/{category}/{mid[-2]}/{mid[-1]}/{mid}.html'
|
||||
|
||||
def get_singer_all_songs(self, singmid, num):
|
||||
return self._download_webpage(
|
||||
r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
|
||||
query={
|
||||
'format': 'json',
|
||||
'inCharset': 'utf8',
|
||||
'outCharset': 'utf-8',
|
||||
'platform': 'yqq',
|
||||
'needNewCode': 0,
|
||||
'singermid': singmid,
|
||||
'order': 'listen',
|
||||
'begin': 0,
|
||||
'num': num,
|
||||
'songstatus': 1,
|
||||
})
|
||||
|
||||
def get_entries_from_page(self, singmid):
|
||||
entries = []
|
||||
|
||||
default_num = 1
|
||||
json_text = self.get_singer_all_songs(singmid, default_num)
|
||||
json_obj_all_songs = self._parse_json(json_text, singmid)
|
||||
|
||||
if json_obj_all_songs['code'] == 0:
|
||||
total = json_obj_all_songs['data']['total']
|
||||
json_text = self.get_singer_all_songs(singmid, total)
|
||||
json_obj_all_songs = self._parse_json(json_text, singmid)
|
||||
|
||||
for item in json_obj_all_songs['data']['list']:
|
||||
if item['musicData'].get('songmid') is not None:
|
||||
songmid = item['musicData']['songmid']
|
||||
entries.append(self.url_result(
|
||||
rf'https://y.qq.com/n/yqq/song/{songmid}.html', 'QQMusic', songmid))
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
class QQMusicSingerIE(QQMusicBaseIE):
|
||||
IE_NAME = 'qqmusic:singer'
|
||||
IE_DESC = 'QQ音乐 - 歌手'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/singer/(?P<id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://y.qq.com/n/ryqq/singer/001BLpXF2DyJe2',
|
||||
'info_dict': {
|
||||
'id': '001BLpXF2DyJe2',
|
||||
'title': '林俊杰',
|
||||
'description': 'md5:870ec08f7d8547c29c93010899103751',
|
||||
'description': 'md5:10624ce73b06fa400bc846f59b0305fa',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'https://y.qq.com/n/ryqq/singer/000Q00f213YzNV',
|
||||
'info_dict': {
|
||||
'id': '000Q00f213YzNV',
|
||||
'title': '桃几OvO',
|
||||
'description': '小破站小唱见~希望大家喜欢听我唱歌~!',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '0016cvsy02mmCl',
|
||||
'ext': 'mp3',
|
||||
'title': '群青',
|
||||
'album': '桃几2021年翻唱集',
|
||||
'release_date': '20210913',
|
||||
'duration': 248,
|
||||
'creators': ['桃几OvO'],
|
||||
'genres': ['Pop'],
|
||||
'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae',
|
||||
'size': 3970822,
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, mid, page_size, page_num):
|
||||
data = self._make_fcu_req({'req_1': {
|
||||
'module': 'music.web_singer_info_svr',
|
||||
'method': 'get_singer_detail_info',
|
||||
'param': {
|
||||
'sort': 5,
|
||||
'singermid': mid,
|
||||
'sin': page_num * page_size,
|
||||
'num': page_size,
|
||||
}}}, mid, note=f'Downloading page {page_num}')
|
||||
yield from traverse_obj(data, ('req_1', 'data', 'songlist', ..., {lambda x: self.url_result(
|
||||
f'https://y.qq.com/n/ryqq/songDetail/{x["mid"]}', QQMusicIE, x['mid'], x.get('title'))}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid = self._match_id(url)
|
||||
init_data = self._download_init_data(url, mid, fatal=False)
|
||||
|
||||
entries = self.get_entries_from_page(mid)
|
||||
singer_page = self._download_webpage(url, mid, 'Download singer page')
|
||||
singer_name = self._html_search_regex(
|
||||
r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
|
||||
singer_desc = None
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, mid, self._PAGE_SIZE), self._PAGE_SIZE),
|
||||
mid, **traverse_obj(init_data, ('singerDetail', {
|
||||
'title': ('basic_info', 'name', {str}),
|
||||
'description': ('ex_info', 'desc', {str}),
|
||||
'thumbnail': ('pic', 'pic', {url_or_none}),
|
||||
})))
|
||||
|
||||
if mid:
|
||||
singer_desc_page = self._download_xml(
|
||||
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
|
||||
'Donwload singer description XML',
|
||||
query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
|
||||
headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
|
||||
|
||||
singer_desc = singer_desc_page.find('./data/info/desc').text
|
||||
|
||||
return self.playlist_result(entries, mid, singer_name, singer_desc)
|
||||
class QQPlaylistBaseIE(InfoExtractor):
|
||||
def _extract_entries(self, info_json, path):
|
||||
for song in traverse_obj(info_json, path):
|
||||
song_mid = song.get('songmid')
|
||||
if not song_mid:
|
||||
continue
|
||||
yield self.url_result(
|
||||
f'https://y.qq.com/n/ryqq/songDetail/{song_mid}',
|
||||
QQMusicIE, song_mid, song.get('songname'))
|
||||
|
||||
|
||||
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:album'
|
||||
IE_DESC = 'QQ音乐 - 专辑'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/albumDetail/(?P<id>[0-9A-Za-z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
|
||||
'url': 'https://y.qq.com/n/ryqq/albumDetail/000gXCTb2AhRR1',
|
||||
'info_dict': {
|
||||
'id': '000gXCTb2AhRR1',
|
||||
'title': '我们都是这样长大的',
|
||||
@@ -236,10 +321,10 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
|
||||
'url': 'https://y.qq.com/n/ryqq/albumDetail/002Y5a3b3AlCu3',
|
||||
'info_dict': {
|
||||
'id': '002Y5a3b3AlCu3',
|
||||
'title': '그리고...',
|
||||
'title': '그리고…',
|
||||
'description': 'md5:a48823755615508a95080e81b51ba729',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
@@ -248,49 +333,45 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
def _real_extract(self, url):
|
||||
mid = self._match_id(url)
|
||||
|
||||
album = self._download_json(
|
||||
f'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid={mid}&format=json',
|
||||
mid, 'Download album page')['data']
|
||||
album_json = self._download_json(
|
||||
'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg',
|
||||
mid, 'Download album page',
|
||||
query={'albummid': mid, 'format': 'json'})['data']
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'],
|
||||
) for song in album['list']
|
||||
]
|
||||
album_name = album.get('name')
|
||||
album_detail = album.get('desc')
|
||||
if album_detail is not None:
|
||||
album_detail = album_detail.strip()
|
||||
entries = self._extract_entries(album_json, ('list', ...))
|
||||
|
||||
return self.playlist_result(entries, mid, album_name, album_detail)
|
||||
return self.playlist_result(entries, mid, **traverse_obj(album_json, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('desc', {str.strip}),
|
||||
}))
|
||||
|
||||
|
||||
class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:toplist'
|
||||
IE_DESC = 'QQ音乐 - 排行榜'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/toplist/(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://y.qq.com/n/yqq/toplist/123.html',
|
||||
'url': 'https://y.qq.com/n/ryqq/toplist/123',
|
||||
'info_dict': {
|
||||
'id': '123',
|
||||
'title': '美国iTunes榜',
|
||||
'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
|
||||
'title': r're:美国热门音乐榜 \d{4}-\d{2}-\d{2}',
|
||||
'description': '美国热门音乐榜,每周一更新。',
|
||||
},
|
||||
'playlist_count': 100,
|
||||
'playlist_count': 95,
|
||||
}, {
|
||||
'url': 'https://y.qq.com/n/yqq/toplist/3.html',
|
||||
'url': 'https://y.qq.com/n/ryqq/toplist/3',
|
||||
'info_dict': {
|
||||
'id': '3',
|
||||
'title': '巅峰榜·欧美',
|
||||
'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
|
||||
'title': r're:巅峰榜·欧美 \d{4}-\d{2}-\d{2}',
|
||||
'description': 'md5:4def03b60d3644be4c9a36f21fd33857',
|
||||
},
|
||||
'playlist_count': 100,
|
||||
}, {
|
||||
'url': 'https://y.qq.com/n/yqq/toplist/106.html',
|
||||
'url': 'https://y.qq.com/n/ryqq/toplist/106',
|
||||
'info_dict': {
|
||||
'id': '106',
|
||||
'title': '韩国Mnet榜',
|
||||
'title': r're:韩国Mnet榜 \d{4}-\d{2}-\d{2}',
|
||||
'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
@@ -304,33 +385,20 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||
note='Download toplist page',
|
||||
query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
|
||||
|
||||
entries = [self.url_result(
|
||||
'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
|
||||
song['data']['songmid'])
|
||||
for song in toplist_json['songlist']]
|
||||
|
||||
topinfo = toplist_json.get('topinfo', {})
|
||||
list_name = topinfo.get('ListName')
|
||||
list_description = topinfo.get('info')
|
||||
return self.playlist_result(entries, list_id, list_name, list_description)
|
||||
return self.playlist_result(
|
||||
self._extract_entries(toplist_json, ('songlist', ..., 'data')), list_id,
|
||||
playlist_title=join_nonempty(*traverse_obj(
|
||||
toplist_json, ((('topinfo', 'ListName'), 'update_time'), None)), delim=' '),
|
||||
playlist_description=traverse_obj(toplist_json, ('topinfo', 'info')))
|
||||
|
||||
|
||||
class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||
IE_NAME = 'qqmusic:playlist'
|
||||
IE_DESC = 'QQ音乐 - 歌单'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/playlist/(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
|
||||
'info_dict': {
|
||||
'id': '3462654915',
|
||||
'title': '韩国5月新歌精选下旬',
|
||||
'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
|
||||
},
|
||||
'playlist_count': 40,
|
||||
'skip': 'playlist gone',
|
||||
}, {
|
||||
'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
|
||||
'url': 'https://y.qq.com/n/ryqq/playlist/1374105607',
|
||||
'info_dict': {
|
||||
'id': '1374105607',
|
||||
'title': '易入人心的华语民谣',
|
||||
@@ -346,19 +414,83 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
|
||||
list_id, 'Download list page',
|
||||
query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
|
||||
transform_source=strip_jsonp)
|
||||
transform_source=strip_jsonp, headers={'Referer': url})
|
||||
if not len(list_json.get('cdlist', [])):
|
||||
if list_json.get('code'):
|
||||
raise ExtractorError(
|
||||
'QQ Music said: error %d in fetching playlist info' % list_json['code'],
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to get playlist info')
|
||||
raise ExtractorError(join_nonempty(
|
||||
'Unable to get playlist info',
|
||||
join_nonempty('code', 'subcode', from_dict=list_json),
|
||||
list_json.get('msg'), delim=': '))
|
||||
|
||||
cdlist = list_json['cdlist'][0]
|
||||
entries = [self.url_result(
|
||||
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
|
||||
for song in cdlist['songlist']]
|
||||
entries = self._extract_entries(list_json, ('cdlist', 0, 'songlist', ...))
|
||||
|
||||
list_name = cdlist.get('dissname')
|
||||
list_description = clean_html(unescapeHTML(cdlist.get('desc')))
|
||||
return self.playlist_result(entries, list_id, list_name, list_description)
|
||||
return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, {
|
||||
'title': ('dissname', {str}),
|
||||
'description': ('desc', {unescapeHTML}, {clean_html}),
|
||||
})))
|
||||
|
||||
|
||||
class QQMusicVideoIE(QQMusicBaseIE):
|
||||
IE_NAME = 'qqmusic:mv'
|
||||
IE_DESC = 'QQ音乐 - MV'
|
||||
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/mv/(?P<id>[0-9A-Za-z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://y.qq.com/n/ryqq/mv/002Vsarh3SVU8K',
|
||||
'info_dict': {
|
||||
'id': '002Vsarh3SVU8K',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Chant (Extended Mix / Audio)',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||
'release_timestamp': 1688918400,
|
||||
'release_date': '20230709',
|
||||
'duration': 313,
|
||||
'creators': ['Duke Dumont'],
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _parse_url_formats(self, url_data):
|
||||
return traverse_obj(url_data, ('mp4', lambda _, v: v['freeflow_url'], {
|
||||
'url': ('freeflow_url', 0, {url_or_none}),
|
||||
'filesize': ('fileSize', {int_or_none}),
|
||||
'format_id': ('newFileType', {str_or_none}),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_info = self._make_fcu_req({
|
||||
'mvInfo': {
|
||||
'module': 'music.video.VideoData',
|
||||
'method': 'get_video_info_batch',
|
||||
'param': {
|
||||
'vidlist': [video_id],
|
||||
'required': [
|
||||
'vid', 'type', 'sid', 'cover_pic', 'duration', 'singers',
|
||||
'video_pay', 'hint', 'code', 'msg', 'name', 'desc',
|
||||
'playcnt', 'pubdate', 'play_forbid_reason'],
|
||||
},
|
||||
},
|
||||
'mvUrl': {
|
||||
'module': 'music.stream.MvUrlProxy',
|
||||
'method': 'GetMvUrls',
|
||||
'param': {'vids': [video_id]},
|
||||
},
|
||||
}, video_id, headers=self.geo_verification_headers())
|
||||
if traverse_obj(video_info, ('mvInfo', 'data', video_id, 'play_forbid_reason')) == 3:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._parse_url_formats(traverse_obj(video_info, ('mvUrl', 'data', video_id))),
|
||||
**traverse_obj(video_info, ('mvInfo', 'data', video_id, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('desc', {str}),
|
||||
'thumbnail': ('cover_pic', {url_or_none}),
|
||||
'release_timestamp': ('pubdate', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'creators': ('singers', ..., 'name', {str}),
|
||||
'view_count': ('playcnt', {int_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
198
yt_dlp/extractor/sproutvideo.py
Normal file
198
yt_dlp/extractor/sproutvideo.py
Normal file
@@ -0,0 +1,198 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SproutVideoIE(InfoExtractor):
|
||||
_NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+'
|
||||
_VALID_URL = rf'https?:{_NO_SCHEME_RE}'
|
||||
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']']
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
|
||||
'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
|
||||
'info_dict': {
|
||||
'id': '4c9dddb01910e3c9c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
|
||||
'duration': 576,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27',
|
||||
'md5': 'cebae5cf558cca83271917cf4ec03f26',
|
||||
'info_dict': {
|
||||
'id': 'a79fdcb21f1be2c62e',
|
||||
'ext': 'mp4',
|
||||
'title': 'HS_01_Live Stream 2023-01-14 10:00',
|
||||
'duration': 703,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
# http formats 'sd' and 'hd' are available
|
||||
'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
|
||||
'md5': 'f368c78df07e78a749508b221528672c',
|
||||
'info_dict': {
|
||||
'id': '119cd6bc1a18e6cd98',
|
||||
'ext': 'mp4',
|
||||
'title': '3. Updating your Partner details',
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
'duration': 60,
|
||||
},
|
||||
'params': {'format': 'hd'},
|
||||
}, {
|
||||
# subtitles
|
||||
'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd',
|
||||
'md5': '7f6798f037d7a3e3e07e67959de68fc6',
|
||||
'info_dict': {
|
||||
'id': '119dd8ba121ee0cc98',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recipients Setup - Domestic Wire Only',
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
'duration': 77,
|
||||
'subtitles': {'en': 'count:1'},
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
|
||||
'info_dict': {
|
||||
'id': '4c9dddb01910e3c9c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
|
||||
'duration': 576,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
_M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8'
|
||||
_QUALITIES = ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it
|
||||
|
||||
@staticmethod
|
||||
def _policy_to_qs(policy, signature_key, as_string=False):
|
||||
query = {}
|
||||
for key, value in policy['signatures'][signature_key].items():
|
||||
query[remove_start(key, 'CloudFront-')] = value
|
||||
query['sessionID'] = policy['sessionID']
|
||||
return urllib.parse.urlencode(query, doseq=True) if as_string else query
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
if embed_url.startswith('//'):
|
||||
embed_url = f'https:{embed_url}'
|
||||
yield smuggle_url(embed_url, {'referer': url})
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
|
||||
data = self._search_json(
|
||||
r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
|
||||
end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
|
||||
|
||||
formats, subtitles = [], {}
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Origin': 'https://videos.sproutvideo.com',
|
||||
'Referer': url,
|
||||
}
|
||||
|
||||
# HLS extraction is fatal; only attempt it if the JSON data says it's available
|
||||
if traverse_obj(data, 'hls'):
|
||||
manifest_query = self._policy_to_qs(data, 'm')
|
||||
fragment_query = self._policy_to_qs(data, 't', as_string=True)
|
||||
key_query = self._policy_to_qs(data, 'k', as_string=True)
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
self._M3U8_URL_TMPL.format(**data), video_id, 'mp4',
|
||||
m3u8_id='hls', headers=headers, query=manifest_query))
|
||||
for fmt in formats:
|
||||
fmt.update({
|
||||
'url': update_url_query(fmt['url'], manifest_query),
|
||||
'extra_param_to_segment_url': fragment_query,
|
||||
'extra_param_to_key_url': key_query,
|
||||
})
|
||||
|
||||
if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
quality = qualities(self._QUALITIES)
|
||||
acodec = 'none' if data.get('has_audio') is False else None
|
||||
formats.extend([{
|
||||
'format_id': str(format_id),
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'quality': quality(format_id),
|
||||
'acodec': acodec,
|
||||
} for format_id, format_url in downloads])
|
||||
|
||||
for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))):
|
||||
subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({
|
||||
'url': sub_data['src'],
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('posterframe_url', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class VidsIoIE(InfoExtractor):
|
||||
IE_NAME = 'vids.io'
|
||||
_VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming',
|
||||
'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e',
|
||||
'info_dict': {
|
||||
'id': '799cd8b11c10efc1f0',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Video: Live Streaming',
|
||||
'duration': 2787,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403)
|
||||
|
||||
if urlh.status == 403:
|
||||
password = self.get_param('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError(
|
||||
'This video is password-protected; use the --video-password option', expected=True)
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Submitting video password',
|
||||
data=urlencode_postdata({
|
||||
'password': password,
|
||||
**self._hidden_inputs(webpage),
|
||||
}))
|
||||
# Requests with user's session cookie `_sproutvideo_session` are now authorized
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
raise ExtractorError('Incorrect password', expected=True)
|
||||
raise
|
||||
|
||||
if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None):
|
||||
return self.url_result(embed_url, SproutVideoIE, video_id)
|
||||
|
||||
raise ExtractorError('Unable to extract any SproutVideo embed url')
|
||||
@@ -1,55 +1,31 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, traverse_obj
|
||||
from .vidyard import VidyardBaseIE
|
||||
from ..utils import ExtractorError, int_or_none, make_archive_id
|
||||
|
||||
|
||||
class SwearnetEpisodeIE(InfoExtractor):
|
||||
class SwearnetEpisodeIE(VidyardBaseIE):
|
||||
_VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1',
|
||||
'info_dict': {
|
||||
'id': '232819',
|
||||
'id': 'wicK2EOzjOdxkUXGDIgcPw',
|
||||
'display_id': '232819',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'duration': 719,
|
||||
'description': 'md5:c48ef71440ce466284c07085cd7bd761',
|
||||
'description': r're:Are you drunk and high and craving a grilled cheese sandwich.+',
|
||||
'season': 'Season 1',
|
||||
'title': 'Episode 1 - Grilled Cheese Sammich',
|
||||
'season_number': 1,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/232819/_RX04IKIq60a2V6rIRqq_Q_small.jpg',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/custom/0dd74f9b-388a-452e-b570-b407fb64435b_small.jpg',
|
||||
'tags': ['Getting Learnt with Ricky', 'drunk', 'grilled cheese', 'high'],
|
||||
'_old_archive_ids': ['swearnetepisode 232819'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, video_source, video_id):
|
||||
video_source = video_source or {}
|
||||
formats, subtitles = [], {}
|
||||
for key, value in video_source.items():
|
||||
if key == 'hls':
|
||||
for video_hls in value:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(video_hls.get('url'), video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.extend({
|
||||
'url': video_mp4.get('url'),
|
||||
'ext': 'mp4',
|
||||
} for video_mp4 in value)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _get_direct_subtitle(self, caption_json):
|
||||
subs = {}
|
||||
for caption in caption_json:
|
||||
subs.setdefault(caption.get('language') or 'und', []).append({
|
||||
'url': caption.get('vttUrl'),
|
||||
'name': caption.get('name'),
|
||||
})
|
||||
|
||||
return subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
slug, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
||||
webpage = self._download_webpage(url, slug)
|
||||
|
||||
try:
|
||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||
@@ -58,22 +34,12 @@ class SwearnetEpisodeIE(InfoExtractor):
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitle(json_data['sources'], display_id)
|
||||
self._merge_subtitles(self._get_direct_subtitle(json_data.get('captions')), target=subtitles)
|
||||
info = self._process_video_json(self._fetch_video_json(external_id)['chapters'][0], external_id)
|
||||
if info.get('display_id'):
|
||||
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||
|
||||
return {
|
||||
'id': str(json_data['videoId']),
|
||||
'title': json_data.get('name') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
'description': (json_data.get('description')
|
||||
or self._html_search_meta(['og:description', 'twitter:description'], webpage)),
|
||||
'duration': int_or_none(json_data.get('seconds')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**info,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'thumbnails': [{'url': thumbnail_url}
|
||||
for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))],
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ from ..utils import (
|
||||
try_call,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -43,8 +44,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'iid': None,
|
||||
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
|
||||
'app_name': 'musical_ly',
|
||||
'app_version': '34.1.2',
|
||||
'manifest_app_version': '2023401020',
|
||||
'app_version': '35.1.3',
|
||||
'manifest_app_version': '2023501030',
|
||||
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
||||
'aid': '0',
|
||||
}
|
||||
@@ -114,7 +115,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'universal data', display_id, end_pattern=r'</script>', default={}),
|
||||
('__DEFAULT_SCOPE__', {dict})) or {}
|
||||
|
||||
def _call_api_impl(self, ep, query, video_id, fatal=True,
|
||||
def _call_api_impl(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
|
||||
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||
@@ -125,7 +126,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||
fatal=fatal, note=note, errnote=errnote, headers={
|
||||
'User-Agent': self._APP_USER_AGENT,
|
||||
'Accept': 'application/json',
|
||||
}, query=query)
|
||||
**(headers or {}),
|
||||
}, query=query, data=data)
|
||||
|
||||
def _build_api_query(self, query):
|
||||
return filter_dict({
|
||||
@@ -174,7 +176,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
||||
})
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True,
|
||||
def _call_api(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
if not self._APP_INFO and not self._get_next_app_info():
|
||||
message = 'No working app info is available'
|
||||
@@ -187,9 +189,11 @@ class TikTokBaseIE(InfoExtractor):
|
||||
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
|
||||
for count in itertools.count(1):
|
||||
self.write_debug(str(self._APP_INFO))
|
||||
real_query = self._build_api_query(query)
|
||||
real_query = self._build_api_query(query or {})
|
||||
try:
|
||||
return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
|
||||
return self._call_api_impl(
|
||||
ep, video_id, query=real_query, data=data, headers=headers,
|
||||
fatal=fatal, note=note, errnote=errnote)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
message = str(e.cause or e.msg)
|
||||
@@ -204,12 +208,13 @@ class TikTokBaseIE(InfoExtractor):
|
||||
raise
|
||||
|
||||
def _extract_aweme_app(self, aweme_id):
|
||||
feed_list = self._call_api(
|
||||
'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed',
|
||||
errnote='Unable to download video feed').get('aweme_list') or []
|
||||
aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
|
||||
aweme_detail = traverse_obj(
|
||||
self._call_api('multi/aweme/detail', aweme_id, data=urlencode_postdata({
|
||||
'aweme_ids': f'[{aweme_id}]',
|
||||
'request_source': '0',
|
||||
}), headers={'X-Argus': ''}), ('aweme_details', 0, {dict}))
|
||||
if not aweme_detail:
|
||||
raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
|
||||
raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id)
|
||||
return self._parse_aweme_video_app(aweme_detail)
|
||||
|
||||
def _extract_web_data_and_status(self, url, video_id, fatal=True):
|
||||
@@ -1037,7 +1042,8 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
post_list = self._call_api(
|
||||
self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
|
||||
self._API_ENDPOINT, display_id, query=query,
|
||||
note=f'Downloading video list page {page}',
|
||||
errnote='Unable to download video list')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
|
||||
@@ -28,35 +28,11 @@ class ToggleIE(InfoExtractor):
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
}, {
|
||||
'note': 'DRM-protected video',
|
||||
'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
|
||||
'info_dict': {
|
||||
'id': '341413',
|
||||
'ext': 'wvm',
|
||||
'title': 'Dug\'s Special Mission',
|
||||
'description': 'md5:e86c6f4458214905c1772398fabc93e0',
|
||||
'upload_date': '20150827',
|
||||
'timestamp': 1440644006,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'DRM-protected wvm download',
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# this also tests correct video id extraction
|
||||
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
|
||||
'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
|
||||
'info_dict': {
|
||||
'id': '332861',
|
||||
'ext': 'mp4',
|
||||
'title': '28th SEA Games (5 Show) - Episode 11',
|
||||
'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
|
||||
'upload_date': '20150605',
|
||||
'timestamp': 1433480166,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'DRM-protected wvm download',
|
||||
},
|
||||
'skip': 'm3u8 links are geo-restricted',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -21,7 +21,7 @@ class TubeTuGrazBaseIE(InfoExtractor):
|
||||
if not urlh:
|
||||
return
|
||||
|
||||
content, urlh = self._download_webpage_handle(
|
||||
response = self._download_webpage_handle(
|
||||
urlh.url, None, fatal=False, headers={'referer': urlh.url},
|
||||
note='logging in', errnote='unable to log in',
|
||||
data=urlencode_postdata({
|
||||
@@ -30,7 +30,11 @@ class TubeTuGrazBaseIE(InfoExtractor):
|
||||
'j_username': username,
|
||||
'j_password': password,
|
||||
}))
|
||||
if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
|
||||
if not response:
|
||||
return
|
||||
|
||||
content, urlh = response
|
||||
if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
|
||||
return
|
||||
|
||||
if not self._html_search_regex(
|
||||
@@ -39,7 +43,7 @@ class TubeTuGrazBaseIE(InfoExtractor):
|
||||
self.report_warning('unable to login: incorrect password')
|
||||
return
|
||||
|
||||
content, urlh = self._download_webpage_handle(
|
||||
urlh = self._request_webpage(
|
||||
urlh.url, None, fatal=False, headers={'referer': urlh.url},
|
||||
note='logging in with TFA', errnote='unable to log in with TFA',
|
||||
data=urlencode_postdata({
|
||||
|
||||
@@ -14,6 +14,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
@@ -107,7 +108,7 @@ class TwitterBaseIE(InfoExtractor):
|
||||
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
|
||||
f = {
|
||||
'url': variant_url,
|
||||
'format_id': 'http' + (f'-{tbr}' if tbr else ''),
|
||||
'format_id': join_nonempty('http', tbr),
|
||||
'tbr': tbr,
|
||||
}
|
||||
self._search_dimensions_in_video_url(f, variant_url)
|
||||
|
||||
426
yt_dlp/extractor/vidyard.py
Normal file
426
yt_dlp/extractor/vidyard.py
Normal file
@@ -0,0 +1,426 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class VidyardBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://play.vidyard.com/'}
|
||||
|
||||
def _get_formats_and_subtitles(self, sources, video_id):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
def add_hls_fmts_and_subs(m3u8_url):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', headers=self._HEADERS, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
hls_list = isinstance(sources, dict) and sources.pop('hls', None)
|
||||
if master_m3u8_url := traverse_obj(
|
||||
hls_list, (lambda _, v: v['profile'] == 'auto', 'url', {url_or_none}, any)):
|
||||
add_hls_fmts_and_subs(master_m3u8_url)
|
||||
if not formats: # These are duplicate and unnecesary requests if we got 'auto' hls fmts
|
||||
for variant_m3u8_url in traverse_obj(hls_list, (..., 'url', {url_or_none})):
|
||||
add_hls_fmts_and_subs(variant_m3u8_url)
|
||||
|
||||
for source_type, source_list in traverse_obj(sources, ({dict.items}, ...)):
|
||||
for source in traverse_obj(source_list, lambda _, v: url_or_none(v['url'])):
|
||||
profile = source.get('profile')
|
||||
formats.append({
|
||||
'url': source['url'],
|
||||
'ext': mimetype2ext(source.get('mimeType'), default=None),
|
||||
'format_id': join_nonempty('http', source_type, profile),
|
||||
**parse_resolution(profile),
|
||||
})
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
return formats, subtitles
|
||||
|
||||
def _get_direct_subtitles(self, caption_json):
|
||||
subs = {}
|
||||
for caption in traverse_obj(caption_json, lambda _, v: url_or_none(v['vttUrl'])):
|
||||
subs.setdefault(caption.get('language') or 'und', []).append({
|
||||
'url': caption['vttUrl'],
|
||||
'name': caption.get('name'),
|
||||
})
|
||||
|
||||
return subs
|
||||
|
||||
def _fetch_video_json(self, video_id):
|
||||
return self._download_json(
|
||||
f'https://play.vidyard.com/player/{video_id}.json', video_id)['payload']
|
||||
|
||||
def _process_video_json(self, json_data, video_id):
|
||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], video_id)
|
||||
self._merge_subtitles(self._get_direct_subtitles(json_data.get('captions')), target=subtitles)
|
||||
|
||||
return {
|
||||
**traverse_obj(json_data, {
|
||||
'id': ('facadeUuid', {str}),
|
||||
'display_id': ('videoId', {int}, {str_or_none}),
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}, {unescapeHTML}, {lambda x: x or None}),
|
||||
'duration': ((
|
||||
('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
('seconds', {int_or_none})), any),
|
||||
'thumbnails': ('thumbnailUrls', ('small', 'normal'), {'url': {url_or_none}}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
|
||||
class VidyardIE(VidyardBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://[\w-]+(?:\.hubs)?\.vidyard\.com/watch/(?P<id>[\w-]+)',
|
||||
r'https?://(?:embed|share)\.vidyard\.com/share/(?P<id>[\w-]+)',
|
||||
r'https?://play\.vidyard\.com/(?:player/)?(?P<id>[\w-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [r'<iframe[^>]* src=["\'](?P<url>(?:https?:)?//play\.vidyard\.com/[\w-]+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://vyexample03.hubs.vidyard.com/watch/oTDMPlUv--51Th455G5u7Q',
|
||||
'info_dict': {
|
||||
'id': 'oTDMPlUv--51Th455G5u7Q',
|
||||
'display_id': '50347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Homepage Video',
|
||||
'description': 'Look I changed the description.',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
|
||||
'duration': 99,
|
||||
'tags': ['these', 'are', 'all', 'tags'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://share.vidyard.com/watch/PaQzDAT1h8JqB8ivEu2j6Y?',
|
||||
'info_dict': {
|
||||
'id': 'PaQzDAT1h8JqB8ivEu2j6Y',
|
||||
'display_id': '9281024',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inline Embed',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
|
||||
'duration': 41.186,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://embed.vidyard.com/share/oTDMPlUv--51Th455G5u7Q',
|
||||
'info_dict': {
|
||||
'id': 'oTDMPlUv--51Th455G5u7Q',
|
||||
'display_id': '50347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Homepage Video',
|
||||
'description': 'Look I changed the description.',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
|
||||
'duration': 99,
|
||||
'tags': ['these', 'are', 'all', 'tags'],
|
||||
},
|
||||
}, {
|
||||
# First video from playlist below
|
||||
'url': 'https://embed.vidyard.com/share/SyStyHtYujcBHe5PkZc5DL',
|
||||
'info_dict': {
|
||||
'id': 'SyStyHtYujcBHe5PkZc5DL',
|
||||
'display_id': '41974005',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'description': r're:In this video, you will learn how to prepare the frame.+',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
|
||||
'duration': 258.666,
|
||||
},
|
||||
}, {
|
||||
# Playlist
|
||||
'url': 'https://thelink.hubs.vidyard.com/watch/pwu7pCYWSwAnPxs8nDoFrE',
|
||||
'info_dict': {
|
||||
'id': 'pwu7pCYWSwAnPxs8nDoFrE',
|
||||
'title': 'PLAYLIST - Palm Beach Shutters- Bi-Fold Track System Installation',
|
||||
'entries': [{
|
||||
'id': 'SyStyHtYujcBHe5PkZc5DL',
|
||||
'display_id': '41974005',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
|
||||
'duration': 258.666,
|
||||
}, {
|
||||
'id': '1Fw4B84jZTXLXWqkE71RiM',
|
||||
'display_id': '5861113',
|
||||
'ext': 'mp4',
|
||||
'title': 'Palm Beach - Bi-Fold Track System "Frame Installation"',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861113/29CJ54s5g1_aP38zkKLHew_small.jpg',
|
||||
'duration': 167.858,
|
||||
}, {
|
||||
'id': 'DqP3wBvLXSpxrcqpT5kEeo',
|
||||
'display_id': '41976334',
|
||||
'ext': 'mp4',
|
||||
'title': 'Install the Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861090/RwG2VaTylUa6KhSTED1r1Q_small.png',
|
||||
'duration': 94.229,
|
||||
}, {
|
||||
'id': 'opfybfxpzQArxqtQYB6oBU',
|
||||
'display_id': '41976364',
|
||||
'ext': 'mp4',
|
||||
'title': 'Install the Panel for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860926/JIOaJR08dM4QgXi_iQ2zGA_small.png',
|
||||
'duration': 191.467,
|
||||
}, {
|
||||
'id': 'rWrXvkbTNNaNqD6189HJya',
|
||||
'display_id': '41976382',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adjust the Panels for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860687/CwHxBv4UudAhOh43FVB4tw_small.png',
|
||||
'duration': 138.155,
|
||||
}, {
|
||||
'id': 'eYPTB521MZ9TPEArSethQ5',
|
||||
'display_id': '41976409',
|
||||
'ext': 'mp4',
|
||||
'title': 'Assemble and Install the Valance for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861425/0y68qlMU4O5VKU7bJ8i_AA_small.png',
|
||||
'duration': 148.224,
|
||||
}],
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
# Non hubs.vidyard.com playlist
|
||||
'url': 'https://salesforce.vidyard.com/watch/d4vqPjs7Q5EzVEis5QT3jd',
|
||||
'info_dict': {
|
||||
'id': 'd4vqPjs7Q5EzVEis5QT3jd',
|
||||
'title': 'How To: Service Cloud: Import External Content in Lightning Knowledge',
|
||||
'entries': [{
|
||||
'id': 'mcjDpSZir2iSttbvFkx6Rv',
|
||||
'display_id': '29479036',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to this Expert Coaching Series',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/ouyQi9WuwyiOupChUWNmjQ/7170d3485ba602e012df05_small.jpg',
|
||||
'duration': 38.205,
|
||||
}, {
|
||||
'id': '84bPYwpg243G6xYEfJdYw9',
|
||||
'display_id': '21820704',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 1 - Title + Agenda',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/HFPN0ZgQq4Ow8BghGcQSow/bfaa30123c8f6601e7d7f2_small.jpg',
|
||||
'duration': 98.016,
|
||||
}, {
|
||||
'id': 'nP17fMuvA66buVHUrzqjTi',
|
||||
'display_id': '21820707',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 2 - Import Options',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rGRIF5nFjPI9OOA2qJ_Dbg/86a8d02bfec9a566845dd4_small.jpg',
|
||||
'duration': 199.136,
|
||||
}, {
|
||||
'id': 'm54EcwXdpA5gDBH5rgCYoV',
|
||||
'display_id': '21820710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 3 - Importing Article Translations',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/IVX4XR8zpSsiNIHx45kz-A/1ccbf8a29a33856d06b3ed_small.jpg',
|
||||
'duration': 184.352,
|
||||
}, {
|
||||
'id': 'j4nzS42oq4hE9oRV73w3eQ',
|
||||
'display_id': '21820716',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 4 - Best Practices',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/BtrRrQpRDLbA4AT95YQyog/1f1e6b8e7fdc3fa95ec8d3_small.jpg',
|
||||
'duration': 296.960,
|
||||
}, {
|
||||
'id': 'y28PYfW5pftvers9PXzisC',
|
||||
'display_id': '21820727',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 5 - Migration Steps',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/K2CdQOXDfLcrVTF60r0bdw/a09239ada28b6ffce12b1f_small.jpg',
|
||||
'duration': 620.640,
|
||||
}, {
|
||||
'id': 'YWU1eQxYvhj29SjYoPw5jH',
|
||||
'display_id': '21820733',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 6 - Demo',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rsmhP-cO8dAa8ilvFGCX0g/7911ef415167cd14032068_small.jpg',
|
||||
'duration': 631.456,
|
||||
}, {
|
||||
'id': 'nmEvVqpwdJUgb74zKsLGxn',
|
||||
'display_id': '29479037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schedule Your Follow-Up',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/Rtwc7X4PEkF4Ae5kHi-Jvw/174ebed3f34227b1ffa1d0_small.jpg',
|
||||
'duration': 33.608,
|
||||
}],
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
# URL of iframe embed src
|
||||
'url': 'https://play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.html',
|
||||
'info_dict': {
|
||||
'id': 'iDqTwWGrd36vaLuaCY3nTs',
|
||||
'display_id': '9281009',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lightbox Embed',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
|
||||
'duration': 39.035,
|
||||
},
|
||||
}, {
|
||||
# Player JSON URL
|
||||
'url': 'https://play.vidyard.com/player/7GAApnNNbcZZ46k6JqJQSh.json?disable_analytics=0',
|
||||
'info_dict': {
|
||||
'id': '7GAApnNNbcZZ46k6JqJQSh',
|
||||
'display_id': '820026',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Art of Storytelling: How to Deliver Your Brand Story with Content & Social',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/MhbE-5sEFQu4x3fI6FkNlA/41eb5717c557cd19456910_small.jpg',
|
||||
'duration': 2153.013,
|
||||
'tags': ['Summit2017'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://share.vidyard.com/share/diYeo6YR2yiGgL8odvS8Ri',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.vidyard.com/FFlz3ZpxhIfKQ1fd9DAryA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.vidyard.com/qhMAu5A76GZVrFzOPgSf9A/type/standalone',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# URL containing inline/lightbox embedded video
|
||||
'url': 'https://resources.altium.com/p/2-the-extreme-importance-of-pc-board-stack-up',
|
||||
'info_dict': {
|
||||
'id': 'GDx1oXrFWj4XHbipfoXaMn',
|
||||
'display_id': '3225198',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Extreme Importance of PC Board Stack Up',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/73_Q3_hBexWX7Og1sae6cg/9998fa4faec921439e2c04_small.jpg',
|
||||
'duration': 3422.742,
|
||||
},
|
||||
}, {
|
||||
# <script ... id="vidyard_embed_code_DXx2sW4WaLA6hTdGFz7ja8" src="//play.vidyard.com/DXx2sW4WaLA6hTdGFz7ja8.js?
|
||||
'url': 'http://videos.vivint.com/watch/DXx2sW4WaLA6hTdGFz7ja8',
|
||||
'info_dict': {
|
||||
'id': 'DXx2sW4WaLA6hTdGFz7ja8',
|
||||
'display_id': '2746529',
|
||||
'ext': 'mp4',
|
||||
'title': 'How To Powercycle the Smart Hub Panel',
|
||||
'duration': 30.613,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/_-6cw8xQUJ3qiCs_JENc_A/b21d7a5e47967f49399d30_small.jpg',
|
||||
},
|
||||
}, {
|
||||
# <script id="vidyard_embed_code_MIBHhiLVTxga7wqLsuoDjQ" src="//embed.vidyard.com/embed/MIBHhiLVTxga7wqLsuoDjQ/inline?v=2.1">
|
||||
'url': 'https://www.babypips.com/learn/forex/introduction-to-metatrader4',
|
||||
'info_dict': {
|
||||
'id': 'MIBHhiLVTxga7wqLsuoDjQ',
|
||||
'display_id': '20291',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 1 - Opening an MT4 Account',
|
||||
'description': 'Never heard of MetaTrader4? Here\'s the 411 on the popular trading platform!',
|
||||
'duration': 168,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/20291/IM-G2WXQR9VBLl2Cmzvftg_small.jpg',
|
||||
},
|
||||
}, {
|
||||
# <iframe ... src="//play.vidyard.com/d61w8EQoZv1LDuPxDkQP2Q/type/background?preview=1"
|
||||
'url': 'https://www.avaya.com/en/',
|
||||
'info_dict': {
|
||||
# These values come from the generic extractor and don't matter
|
||||
'id': str,
|
||||
'title': str,
|
||||
'age_limit': 0,
|
||||
'upload_date': str,
|
||||
'description': str,
|
||||
'thumbnail': str,
|
||||
'timestamp': float,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'd61w8EQoZv1LDuPxDkQP2Q',
|
||||
'display_id': '42456529',
|
||||
'ext': 'mp4',
|
||||
'title': 'GettyImages-1027',
|
||||
'duration': 6.0,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42061563/p6bY08d2N4e4IDz-7J4_wkgsPq3-qgcx_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'VAsYDi7eiqZRbHodUA2meC',
|
||||
'display_id': '42456569',
|
||||
'ext': 'mp4',
|
||||
'title': 'GettyImages-1325598833',
|
||||
'duration': 6.083,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42052358/y3qrbDpn_2quWr_5XBi7yzS3UvEI__ZM_small.jpg',
|
||||
},
|
||||
}],
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# <div class="vidyard-player-embed" data-uuid="vpCWTVHw3qrciLtVY94YkS"
|
||||
'url': 'https://www.gogoair.com/',
|
||||
'info_dict': {
|
||||
# These values come from the generic extractor and don't matter
|
||||
'id': str,
|
||||
'title': str,
|
||||
'description': str,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'vpCWTVHw3qrciLtVY94YkS',
|
||||
'display_id': '40780699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Upgrade to AVANCE 100% worth it - Jason Talley, Owner and Pilot, Testimonial',
|
||||
'description': 'md5:f609824839439a51990cef55ffc472aa',
|
||||
'duration': 70.737,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/40780699/KzjfYZz5MZl2gHF_e-4i2c6ib1cLDweQ_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'xAmV9AsLbnitCw35paLBD8',
|
||||
'display_id': '31130867',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brad Keselowski goes faster with Gogo AVANCE inflight Wi-Fi',
|
||||
'duration': 132.565,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/31130867/HknyDtLdm2Eih9JZ4A5XLjhfBX_6HRw5_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'RkkrFRNxfP79nwCQavecpF',
|
||||
'display_id': '39009815',
|
||||
'ext': 'mp4',
|
||||
'title': 'Live Demo of Gogo Galileo',
|
||||
'description': 'md5:e2df497236f4e12c3fef8b392b5f23e0',
|
||||
'duration': 112.128,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/38144873/CWLlxfUbJ4Gh0ThbUum89IsEM4yupzMb_small.jpg',
|
||||
},
|
||||
}],
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
# Handle protocol-less embed URLs
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
if embed_url.startswith('//'):
|
||||
embed_url = f'https:{embed_url}'
|
||||
yield embed_url
|
||||
|
||||
# Extract inline/lightbox embeds
|
||||
for embed_element in re.findall(
|
||||
r'(<(?:img|div)[^>]* class=(["\'])(?:[^>"\']* )?vidyard-player-embed(?: [^>"\']*)?\2[^>]+>)', webpage):
|
||||
if video_id := extract_attributes(embed_element[0]).get('data-uuid'):
|
||||
yield f'https://play.vidyard.com/{video_id}'
|
||||
|
||||
for embed_id in re.findall(r'<script[^>]* id=["\']vidyard_embed_code_([\w-]+)["\']', webpage):
|
||||
yield f'https://play.vidyard.com/{embed_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_json = self._fetch_video_json(video_id)
|
||||
|
||||
if len(video_json['chapters']) == 1:
|
||||
return self._process_video_json(video_json['chapters'][0], video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
[self._process_video_json(chapter, video_id) for chapter in video_json['chapters']],
|
||||
str(video_json['playerUuid']), video_json.get('name'))
|
||||
@@ -5,6 +5,7 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_age_limit,
|
||||
traverse_obj,
|
||||
)
|
||||
@@ -120,7 +121,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
|
||||
'height', default=None))
|
||||
formats.append({
|
||||
'url': video_asset_url,
|
||||
'format_id': 'http{}'.format(f'-{bitrate}' if bitrate else ''),
|
||||
'format_id': join_nonempty('http', bitrate),
|
||||
'tbr': bitrate,
|
||||
'height': height,
|
||||
'vcodec': video_asset.get('codec'),
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import base64
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
@@ -14,6 +15,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_filesize,
|
||||
@@ -84,29 +86,23 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
expected=True)
|
||||
return password
|
||||
|
||||
def _verify_video_password(self, url, video_id, password, token, vuid):
|
||||
if url.startswith('http://'):
|
||||
# vimeo only supports https now, but the user can give an http url
|
||||
url = url.replace('http://', 'https://')
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
return self._download_webpage(
|
||||
url + '/password', video_id, 'Verifying the password',
|
||||
'Wrong password', data=urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
def _extract_xsrft_and_vuid(self, webpage):
|
||||
xsrft = self._search_regex(
|
||||
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
webpage, 'login token', group='xsrft')
|
||||
vuid = self._search_regex(
|
||||
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
|
||||
webpage, 'vuid', group='vuid')
|
||||
return xsrft, vuid
|
||||
def _verify_video_password(self, video_id, password, token):
|
||||
url = f'https://vimeo.com/{video_id}'
|
||||
try:
|
||||
return self._download_webpage(
|
||||
f'{url}/password', video_id,
|
||||
'Submitting video password', data=json.dumps({
|
||||
'password': password,
|
||||
'token': token,
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
'Accept': '*/*',
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': url,
|
||||
}, impersonate=True)
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 418:
|
||||
raise ExtractorError('Wrong password', expected=True)
|
||||
raise
|
||||
|
||||
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
|
||||
vimeo_config = self._search_regex(
|
||||
@@ -745,21 +741,34 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
raise ExtractorError('Wrong video password', expected=True)
|
||||
return checked
|
||||
|
||||
def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
token = self._download_json(
|
||||
'https://vimeo.com/_rv/jwt', video_id, headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})['token']
|
||||
api_url = 'https://api.vimeo.com/videos/' + video_id
|
||||
if unlisted_hash:
|
||||
api_url += ':' + unlisted_hash
|
||||
video = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Authorization': 'jwt ' + token,
|
||||
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
|
||||
video_id, 'Downloading API JSON', headers={
|
||||
'Authorization': f'jwt {jwt_token}',
|
||||
'Accept': 'application/json',
|
||||
}, query={
|
||||
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
|
||||
})
|
||||
|
||||
def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
|
||||
|
||||
for retry in (False, True):
|
||||
try:
|
||||
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
|
||||
except ExtractorError as e:
|
||||
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
|
||||
and 'password' in traverse_obj(
|
||||
e.cause.response.read(),
|
||||
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
)):
|
||||
self._verify_video_password(
|
||||
video_id, self._get_video_password(), viewer['xsrft'])
|
||||
continue
|
||||
raise
|
||||
|
||||
info = self._parse_config(self._download_json(
|
||||
video['config_url'], video_id), video_id)
|
||||
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
|
||||
@@ -829,21 +838,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
||||
self._try_album_password(url)
|
||||
is_secure = urllib.parse.urlparse(url).scheme == 'https'
|
||||
try:
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
url, video_id, headers=headers)
|
||||
url, video_id, headers=headers, impersonate=is_secure)
|
||||
redirect_url = urlh.url
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, HTTPError) and ee.cause.status == 403:
|
||||
errmsg = ee.cause.response.read()
|
||||
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
|
||||
raise ExtractorError(
|
||||
'Cannot download embed-only video without embedding '
|
||||
'URL. Please call yt-dlp with the URL of the page '
|
||||
'that embeds this video.',
|
||||
expected=True)
|
||||
raise
|
||||
except ExtractorError as error:
|
||||
if not isinstance(error.cause, HTTPError) or error.cause.status not in (403, 429):
|
||||
raise
|
||||
errmsg = error.cause.response.read()
|
||||
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
|
||||
raise ExtractorError(
|
||||
'Cannot download embed-only video without embedding URL. Please call yt-dlp '
|
||||
'with the URL of the page that embeds this video.', expected=True)
|
||||
# 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
|
||||
status = error.cause.status
|
||||
dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
|
||||
if target := error.cause.response.extensions.get('impersonate'):
|
||||
raise ExtractorError(
|
||||
f'Got HTTP Error {status} when using impersonate target "{target}". {dcip_msg}')
|
||||
elif not is_secure:
|
||||
raise ExtractorError(f'Got HTTP Error {status}. {dcip_msg}', expected=True)
|
||||
raise ExtractorError(
|
||||
'This request has been blocked due to its TLS fingerprint. Install a '
|
||||
'required impersonation dependency if possible, or else if you are okay with '
|
||||
f'{self._downloader._format_err("compromising your security/cookies", "light red")}, '
|
||||
f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True)
|
||||
|
||||
if '://player.vimeo.com/video/' in url:
|
||||
config = self._search_json(
|
||||
@@ -853,12 +874,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
redirect_url, video_id, headers)
|
||||
return self._parse_config(config, video_id)
|
||||
|
||||
if re.search(r'<form[^>]+?id="pw_form"', webpage):
|
||||
video_password = self._get_video_password()
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
webpage = self._verify_video_password(
|
||||
redirect_url, video_id, video_password, token, vuid)
|
||||
|
||||
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
|
||||
if vimeo_config:
|
||||
seed_status = vimeo_config.get('seed_status') or {}
|
||||
@@ -1278,9 +1293,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
video_password = self._get_video_password()
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id)
|
||||
webpage = self._verify_video_password(
|
||||
'https://vimeo.com/' + video_id, video_id,
|
||||
video_password, viewer['xsrft'], viewer['vuid'])
|
||||
webpage = self._verify_video_password(video_id, video_password, viewer['xsrft'])
|
||||
clip_page_config = self._parse_json(self._search_regex(
|
||||
r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
|
||||
webpage, 'clip page config'), video_id)
|
||||
|
||||
108
yt_dlp/extractor/vtv.py
Normal file
108
yt_dlp/extractor/vtv.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, get_element_html_by_class, remove_start
|
||||
|
||||
|
||||
class VTVGoIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?vtvgo\.vn/(kho-video|tin-tuc)/[\w.-]*?(?P<id>\d+)(?:\.[a-z]+|/)?(?:$|[?#])',
|
||||
r'https?://(?:www\.)?vtvgo\.vn/digital/detail\.php\?(?:[^#]+&)?content_id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://vtvgo.vn/kho-video/bep-vtv-vit-chao-rieng-so-24-888456.html',
|
||||
'info_dict': {
|
||||
'id': '888456',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bếp VTV | Vịt chao riềng | Số 24',
|
||||
'description': 'md5:2b4e93ec2b954304170d32be288ce2c8',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20230201/VIT-CHAO-RIENG_VTV_638108894672812459.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/tin-tuc/hot-search-1-zlife-khong-ngo-toi-phai-khong-862074',
|
||||
'info_dict': {
|
||||
'id': '862074',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot Search #1 | Zlife | Không ngờ tới phải không? ',
|
||||
'description': 'md5:e967d0e2efbbebbee8814a55799b4d0f',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20220504/6b9a8552-e71c-46ce-bc9d-50c9bb506f9c.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/kho-video/918311.html',
|
||||
'info_dict': {
|
||||
'id': '918311',
|
||||
'title': 'Cà phê sáng | 05/02/2024 | Tái hiện hình ảnh Hà Nội xưa tại ngôi nhà di sản',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240205/0506_ca_phe_sang_638427226021318322.jpg',
|
||||
'description': 'md5:b121c67948f1ce58e6a036042fc14c1b',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/digital/detail.php?digital_id=168&content_id=918634',
|
||||
'info_dict': {
|
||||
'id': '918634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gặp nhau cuối năm | Táo quân 2024',
|
||||
'description': 'md5:a1c221e78e5954d29d49b2a11c20513c',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240210/d0f73369-8f03-4108-9edd-83d4bc3997b2.png',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/digital/detail.php?content_id=919358',
|
||||
'info_dict': {
|
||||
'id': '919358',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chúng ta của 8 năm sau | Tập 45 | Dương có bằng chứng, nhân chứng vạch mặt ông Khiêm',
|
||||
'description': 'md5:16ff5208cac6585137f554472a4677f3',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240221/550deff9-7736-4a0e-8b5d-33274d97cd7d.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/kho-video/888456',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m3u8_url = self._search_regex(
|
||||
r'(?:var\s+link\s*=\s*|addPlayer\()["\'](https://[^"\']+/index\.m3u8)["\']', webpage, 'm3u8 url')
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||
}
|
||||
|
||||
|
||||
class VTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vtv\.vn/video/[\w-]*?(?P<id>\d+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'https://vtv.vn/video/thoi-su-20h-vtv1-12-6-2024-680411.htm',
|
||||
'info_dict': {
|
||||
'id': '680411',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thời sự 20h VTV1 - 12/6/2024 - Video đã phát trên VTV1 | VTV.VN',
|
||||
'thumbnail': 'https://cdn-images.vtv.vn/zoom/600_315/66349b6076cb4dee98746cf1/2024/06/12/thumb/1206-ts-20h-02929741475480320806760.mp4/thumb0.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtv.vn/video/zlife-1-khong-ngo-toi-phai-khong-vtv24-560248.htm',
|
||||
'info_dict': {
|
||||
'id': '560248',
|
||||
'ext': 'mp4',
|
||||
'title': 'ZLife #1: Không ngờ tới phải không? | VTV24 - Video đã phát trên VTV-NEWS | VTV.VN',
|
||||
'description': 'Ai đứng sau vụ việc thay đổi ảnh đại diện trên các trang mạng xã hội của VTV Digital tối 2/5?',
|
||||
'thumbnail': 'https://video-thumbs.mediacdn.vn/zoom/600_315/vtv/2022/5/13/t67s6btf3ji-16524555726231894427334.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_vid = extract_attributes(get_element_html_by_class(
|
||||
'VCSortableInPreviewMode', get_element_html_by_class(
|
||||
'video-highlight-box', webpage)))['data-vid']
|
||||
m3u8_url = f'https://cdn-videos.vtv.vn/{remove_start(data_vid, "vtv.mediacdn.vn/")}/master.m3u8'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||
}
|
||||
@@ -52,6 +52,7 @@ class WeiboBaseIE(InfoExtractor):
|
||||
})
|
||||
|
||||
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
|
||||
# XXX: Always fatal; _download_webpage_handle only returns False (not a tuple) on error
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
|
||||
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
|
||||
self._update_visitor_cookies(urlh.url, video_id)
|
||||
|
||||
@@ -2,6 +2,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -76,7 +77,7 @@ class WSJIE(InfoExtractor):
|
||||
tbr = int_or_none(v.get('bitrate'))
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
'format_id': 'http' + (f'-{tbr}' if tbr else ''),
|
||||
'format_id': join_nonempty('http', tbr),
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(v.get('width')),
|
||||
'height': int_or_none(v.get('height')),
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
@@ -213,7 +214,7 @@ class YahooIE(InfoExtractor):
|
||||
tbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'url': s_url,
|
||||
'format_id': fmt + (f'-{tbr}' if tbr else ''),
|
||||
'format_id': join_nonempty(fmt, tbr),
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
'tbr': tbr,
|
||||
@@ -371,12 +372,13 @@ class YahooJapanNewsIE(InfoExtractor):
|
||||
url, content_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
bitrate = int_or_none(vid.get('bitrate'))
|
||||
formats.append({
|
||||
'url': url,
|
||||
'format_id': f'http-{vid.get("bitrate")}',
|
||||
'format_id': join_nonempty('http', bitrate),
|
||||
'height': int_or_none(vid.get('height')),
|
||||
'width': int_or_none(vid.get('width')),
|
||||
'tbr': int_or_none(vid.get('bitrate')),
|
||||
'tbr': bitrate,
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
try_get,
|
||||
urljoin,
|
||||
@@ -116,12 +117,9 @@ class YandexDiskIE(InfoExtractor):
|
||||
else:
|
||||
size = video.get('size') or {}
|
||||
height = int_or_none(size.get('height'))
|
||||
format_id = 'hls'
|
||||
if height:
|
||||
format_id += f'-{height}p'
|
||||
formats.append({
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'format_id': join_nonempty('hls', height and f'{height}p'),
|
||||
'height': height,
|
||||
'protocol': 'm3u8_native',
|
||||
'url': format_url,
|
||||
|
||||
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
|
||||
class YleAreenaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)'
|
||||
_GEO_COUNTRIES = ['FI']
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-4371942',
|
||||
@@ -19,7 +20,7 @@ class YleAreenaIE(InfoExtractor):
|
||||
'id': '0_a3tjk92c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pouchit',
|
||||
'description': 'md5:d487309c3abbe5650265bbd1742d2f82',
|
||||
'description': 'md5:01071d7056ceec375f63960f90c35366',
|
||||
'series': 'Modernit miehet',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
@@ -87,8 +88,8 @@ class YleAreenaIE(InfoExtractor):
|
||||
})
|
||||
|
||||
# Example title: 'K1, J2: Pouchit | Modernit miehet'
|
||||
series, season_number, episode_number, episode = self._search_regex(
|
||||
r'K(?P<season_no>[\d]+),\s*J(?P<episode_no>[\d]+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
|
||||
season_number, episode_number, episode, series = self._search_regex(
|
||||
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
|
||||
info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
||||
default=(None, None, None, None))
|
||||
description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
|
||||
@@ -110,10 +111,12 @@ class YleAreenaIE(InfoExtractor):
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
}
|
||||
else:
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -129,6 +132,6 @@ class YleAreenaIE(InfoExtractor):
|
||||
or int_or_none(episode_number)),
|
||||
'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
|
||||
'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
|
||||
'subtitles': subtitles,
|
||||
'subtitles': subtitles or None,
|
||||
'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import collections
|
||||
import copy
|
||||
import datetime as dt
|
||||
import enum
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
@@ -20,7 +21,6 @@ import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..compat import functools
|
||||
from ..jsinterp import JSInterpreter
|
||||
from ..networking.exceptions import HTTPError, network_exceptions
|
||||
from ..utils import (
|
||||
@@ -468,7 +468,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
|
||||
]
|
||||
|
||||
_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
|
||||
_IGNORED_WARNINGS = {
|
||||
'Unavailable videos will be hidden during playback',
|
||||
'Unavailable videos are hidden',
|
||||
}
|
||||
|
||||
_YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
|
||||
_YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
|
||||
@@ -3797,6 +3800,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
||||
CHUNK_SIZE = 10 << 20
|
||||
PREFERRED_LANG_VALUE = 10
|
||||
original_language = None
|
||||
itags, stream_ids = collections.defaultdict(set), []
|
||||
itag_qualities, res_qualities = {}, {0: None}
|
||||
q = qualities([
|
||||
@@ -3845,6 +3850,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
itag_qualities[itag] = quality
|
||||
if height:
|
||||
res_qualities[height] = quality
|
||||
|
||||
is_default = audio_track.get('audioIsDefault')
|
||||
is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
|
||||
language_code = audio_track.get('id', '').split('.')[0]
|
||||
if language_code and is_default:
|
||||
original_language = language_code
|
||||
|
||||
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
||||
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
|
||||
# number of fragment that would subsequently requested with (`&sq=N`)
|
||||
@@ -3870,7 +3882,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
|
||||
query = parse_qs(fmt_url)
|
||||
throttled = False
|
||||
if query.get('n'):
|
||||
try:
|
||||
decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
|
||||
@@ -3884,20 +3895,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
|
||||
if player_url:
|
||||
self.report_warning(
|
||||
f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
|
||||
f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
|
||||
f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
|
||||
self.write_debug(e, only_once=True)
|
||||
else:
|
||||
self.report_warning(
|
||||
'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
|
||||
'Cannot decrypt nsig without player_url: Some formats may be missing',
|
||||
video_id=video_id, only_once=True)
|
||||
throttled = True
|
||||
continue
|
||||
|
||||
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
||||
language_preference = (
|
||||
10 if audio_track.get('audioIsDefault') and 10
|
||||
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
|
||||
else -1)
|
||||
format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
|
||||
# Some formats may have much smaller duration than others (possibly damaged during encoding)
|
||||
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||||
@@ -3924,17 +3931,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'filesize': int_or_none(fmt.get('contentLength')),
|
||||
'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
|
||||
'format_note': join_nonempty(
|
||||
join_nonempty(audio_track.get('displayName'),
|
||||
language_preference > 0 and ' (default)', delim=''),
|
||||
join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
|
||||
name, fmt.get('isDrc') and 'DRC',
|
||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
|
||||
is_damaged and 'DAMAGED', is_broken and 'BROKEN',
|
||||
(self.get_param('verbose') or all_formats) and client_name,
|
||||
delim=', '),
|
||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||
'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
|
||||
+ (100 if 'Premium' in name else 0)),
|
||||
'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
|
||||
'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
|
||||
'audio_channels': fmt.get('audioChannels'),
|
||||
'height': height,
|
||||
@@ -3944,9 +3949,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'filesize_approx': filesize_from_tbr(tbr, format_duration),
|
||||
'url': fmt_url,
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
||||
'desc' if language_preference < -1 else '') or None,
|
||||
'language_preference': language_preference,
|
||||
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
||||
'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
|
||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
|
||||
}
|
||||
@@ -4007,6 +4011,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif itag:
|
||||
f['format_id'] = itag
|
||||
|
||||
if original_language and f.get('language') == original_language:
|
||||
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
||||
f['language_preference'] = PREFERRED_LANG_VALUE
|
||||
|
||||
if f.get('source_preference') is None:
|
||||
f['source_preference'] = -1
|
||||
|
||||
@@ -4351,7 +4359,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
|
||||
'live_status': live_status,
|
||||
'release_timestamp': live_start_time,
|
||||
'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
|
||||
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
|
||||
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
|
||||
}
|
||||
|
||||
|
||||
@@ -66,7 +66,9 @@ class ZaikoIE(ZaikoBaseIE):
|
||||
stream_meta['stream-access']['video_source'], video_id,
|
||||
'Downloading player page', headers={'referer': 'https://zaiko.io/'})
|
||||
player_meta = self._parse_vue_element_attr('player', player_page, video_id)
|
||||
status = traverse_obj(player_meta, ('initial_event_info', 'status', {str}))
|
||||
initial_event_info = traverse_obj(player_meta, ('initial_event_info', {dict})) or {}
|
||||
|
||||
status = traverse_obj(initial_event_info, ('status', {str}))
|
||||
live_status, msg, expected = {
|
||||
'vod': ('was_live', 'No VOD stream URL was found', False),
|
||||
'archiving': ('post_live', 'Event VOD is still being processed', True),
|
||||
@@ -80,14 +82,20 @@ class ZaikoIE(ZaikoBaseIE):
|
||||
'cancelled': ('not_live', 'Event has been cancelled', True),
|
||||
}.get(status) or ('not_live', f'Unknown event status "{status}"', False)
|
||||
|
||||
stream_url = traverse_obj(player_meta, ('initial_event_info', 'endpoint', {url_or_none}))
|
||||
if traverse_obj(initial_event_info, ('is_jwt_protected', {bool})):
|
||||
stream_url = self._download_json(
|
||||
initial_event_info['jwt_token_url'], video_id, 'Downloading JWT-protected stream URL',
|
||||
'Failed to download JWT-protected stream URL')['playback_url']
|
||||
else:
|
||||
stream_url = traverse_obj(initial_event_info, ('endpoint', {url_or_none}))
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_url, video_id, live=True, fatal=False) if stream_url else []
|
||||
if not formats:
|
||||
self.raise_no_formats(msg, expected=expected)
|
||||
|
||||
thumbnail_urls = [
|
||||
traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
|
||||
traverse_obj(initial_event_info, ('poster_url', {url_or_none})),
|
||||
self._og_search_thumbnail(self._download_webpage(
|
||||
f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
|
||||
]
|
||||
@@ -103,9 +111,7 @@ class ZaikoIE(ZaikoBaseIE):
|
||||
'release_timestamp': ('stream', 'start', 'timestamp', {int_or_none}),
|
||||
'categories': ('event', 'genres', ..., {lambda x: x or None}),
|
||||
}),
|
||||
**traverse_obj(player_meta, ('initial_event_info', {
|
||||
'alt_title': ('title', {str}),
|
||||
})),
|
||||
'alt_title': traverse_obj(initial_event_info, ('title', {str})),
|
||||
'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)],
|
||||
}
|
||||
|
||||
|
||||
@@ -667,12 +667,12 @@ class JSInterpreter:
|
||||
self.interpret_expression(v, local_vars, allow_recursion)
|
||||
for v in self._separate(arg_str)]
|
||||
|
||||
if obj == str:
|
||||
if obj is str:
|
||||
if member == 'fromCharCode':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
return ''.join(map(chr, argvals))
|
||||
raise self.Exception(f'Unsupported String method {member}', expr)
|
||||
elif obj == float:
|
||||
elif obj is float:
|
||||
if member == 'pow':
|
||||
assertion(len(argvals) == 2, 'takes two arguments')
|
||||
return argvals[0] ** argvals[1]
|
||||
|
||||
@@ -230,9 +230,7 @@ class Urllib3LoggingFilter(logging.Filter):
|
||||
|
||||
def filter(self, record):
|
||||
# Ignore HTTP request messages since HTTPConnection prints those
|
||||
if record.msg == '%s://%s:%s "%s %s %s" %s %s':
|
||||
return False
|
||||
return True
|
||||
return record.msg != '%s://%s:%s "%s %s %s" %s %s'
|
||||
|
||||
|
||||
class Urllib3LoggingHandler(logging.Handler):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import functools
|
||||
import io
|
||||
import logging
|
||||
import ssl
|
||||
@@ -22,7 +23,6 @@ from .exceptions import (
|
||||
TransportError,
|
||||
)
|
||||
from .websocket import WebSocketRequestHandler, WebSocketResponse
|
||||
from ..compat import functools
|
||||
from ..dependencies import websockets
|
||||
from ..socks import ProxyError as SocksProxyError
|
||||
from ..utils import int_or_none
|
||||
|
||||
@@ -474,10 +474,10 @@ def create_parser():
|
||||
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
||||
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||
'prefer-legacy-http-handler', 'manifest-filesize-approx',
|
||||
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext',
|
||||
}, 'aliases': {
|
||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
|
||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext'],
|
||||
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
|
||||
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
|
||||
'2023': [],
|
||||
@@ -646,7 +646,7 @@ def create_parser():
|
||||
'You can also simply specify a field to match if the field is present, '
|
||||
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
|
||||
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
|
||||
'the filter matches if atleast one of the conditions are met. E.g. --match-filter '
|
||||
'the filter matches if at least one of the conditions is met. E.g. --match-filter '
|
||||
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
|
||||
'matches only videos that are not live OR those that have a like count more than 100 '
|
||||
'(or the like field is not available) and also has a description '
|
||||
@@ -1479,7 +1479,7 @@ def create_parser():
|
||||
'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, '
|
||||
'the name/path of the PROFILE to load cookies from, '
|
||||
'and the CONTAINER name (if Firefox) ("none" for no container) '
|
||||
'can be given with their respective seperators. '
|
||||
'can be given with their respective separators. '
|
||||
'By default, all containers of the most recently accessed profile are used. '
|
||||
f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}'))
|
||||
filesystem.add_option(
|
||||
@@ -1781,7 +1781,7 @@ def create_parser():
|
||||
'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), '
|
||||
'"video" (after --format; before --print/--output), "before_dl" (before each video download), '
|
||||
'"post_process" (after each video download; default), '
|
||||
'"after_move" (after moving video file to it\'s final locations), '
|
||||
'"after_move" (after moving video file to its final locations), '
|
||||
'"after_video" (after downloading and processing all formats of a video), '
|
||||
'or "playlist" (at end of playlist). '
|
||||
'This option can be used multiple times to add different postprocessors'))
|
||||
|
||||
@@ -119,15 +119,22 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
if not mutagen or prefer_atomicparsley:
|
||||
success = False
|
||||
else:
|
||||
self._report_run('mutagen', filename)
|
||||
f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}
|
||||
try:
|
||||
self._report_run('mutagen', filename)
|
||||
with open(thumbnail_filename, 'rb') as thumbfile:
|
||||
thumb_data = thumbfile.read()
|
||||
|
||||
type_ = imghdr.what(h=thumb_data)
|
||||
if not type_:
|
||||
raise ValueError('could not determine image type')
|
||||
elif type_ not in f:
|
||||
raise ValueError(f'incompatible image type: {type_}')
|
||||
|
||||
meta = MP4(filename)
|
||||
# NOTE: the 'covr' atom is a non-standard MPEG-4 atom,
|
||||
# Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom.
|
||||
f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}[imghdr.what(thumbnail_filename)]
|
||||
with open(thumbnail_filename, 'rb') as thumbfile:
|
||||
thumb_data = thumbfile.read()
|
||||
meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f)]
|
||||
meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f[type_])]
|
||||
meta.save()
|
||||
temp_filename = filename
|
||||
except Exception as err:
|
||||
@@ -160,9 +167,10 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if returncode:
|
||||
self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
|
||||
success = False
|
||||
# for formats that don't support thumbnails (like 3gp) AtomicParsley
|
||||
# won't create to the temporary file
|
||||
if 'No changes' in stdout:
|
||||
elif 'No changes' in stdout:
|
||||
self.report_warning('The file format doesn\'t support embedding a thumbnail')
|
||||
success = False
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import collections
|
||||
import contextvars
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
@@ -8,7 +9,7 @@ import subprocess
|
||||
import time
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import functools, imghdr
|
||||
from ..compat import imghdr
|
||||
from ..utils import (
|
||||
MEDIA_EXTENSIONS,
|
||||
ISO639Utils,
|
||||
|
||||
@@ -2085,19 +2085,20 @@ def parse_duration(s):
|
||||
(days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
|
||||
|
||||
|
||||
def prepend_extension(filename, ext, expected_real_ext=None):
|
||||
def _change_extension(prepend, filename, ext, expected_real_ext=None):
|
||||
name, real_ext = os.path.splitext(filename)
|
||||
return (
|
||||
f'{name}.{ext}{real_ext}'
|
||||
if not expected_real_ext or real_ext[1:] == expected_real_ext
|
||||
else f'{filename}.{ext}')
|
||||
|
||||
if not expected_real_ext or real_ext[1:] == expected_real_ext:
|
||||
filename = name
|
||||
if prepend and real_ext:
|
||||
_UnsafeExtensionError.sanitize_extension(ext, prepend=True)
|
||||
return f'{filename}.{ext}{real_ext}'
|
||||
|
||||
return f'{filename}.{_UnsafeExtensionError.sanitize_extension(ext)}'
|
||||
|
||||
|
||||
def replace_extension(filename, ext, expected_real_ext=None):
|
||||
name, real_ext = os.path.splitext(filename)
|
||||
ext = ext if ext.startswith('.') else '.' + ext
|
||||
|
||||
return f'{name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename}{ext}'
|
||||
prepend_extension = functools.partial(_change_extension, True)
|
||||
replace_extension = functools.partial(_change_extension, False)
|
||||
|
||||
|
||||
def check_executable(exe, args=[]):
|
||||
@@ -5025,7 +5026,7 @@ MEDIA_EXTENSIONS = Namespace(
|
||||
common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
|
||||
video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
|
||||
common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
|
||||
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
|
||||
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
|
||||
thumbnails=('jpg', 'png', 'webp'),
|
||||
storyboards=('mhtml', ),
|
||||
subtitles=('srt', 'vtt', 'ass', 'lrc'),
|
||||
@@ -5037,6 +5038,135 @@ MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
|
||||
KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
|
||||
|
||||
|
||||
class _UnsafeExtensionError(Exception):
|
||||
"""
|
||||
Mitigation exception for uncommon/malicious file extensions
|
||||
This should be caught in YoutubeDL.py alongside a warning
|
||||
|
||||
Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
|
||||
"""
|
||||
ALLOWED_EXTENSIONS = frozenset([
|
||||
# internal
|
||||
'description',
|
||||
'json',
|
||||
'meta',
|
||||
'orig',
|
||||
'part',
|
||||
'temp',
|
||||
'uncut',
|
||||
'unknown_video',
|
||||
'ytdl',
|
||||
|
||||
# video
|
||||
*MEDIA_EXTENSIONS.video,
|
||||
'asx',
|
||||
'ismv',
|
||||
'm2t',
|
||||
'm2ts',
|
||||
'm2v',
|
||||
'm4s',
|
||||
'mng',
|
||||
'mp2v',
|
||||
'mp4v',
|
||||
'mpe',
|
||||
'mpeg',
|
||||
'mpeg1',
|
||||
'mpeg2',
|
||||
'mpeg4',
|
||||
'mxf',
|
||||
'ogm',
|
||||
'qt',
|
||||
'rm',
|
||||
'swf',
|
||||
'ts',
|
||||
'vob',
|
||||
'vp9',
|
||||
|
||||
# audio
|
||||
*MEDIA_EXTENSIONS.audio,
|
||||
'3ga',
|
||||
'ac3',
|
||||
'adts',
|
||||
'aif',
|
||||
'au',
|
||||
'dts',
|
||||
'isma',
|
||||
'it',
|
||||
'mid',
|
||||
'mod',
|
||||
'mpga',
|
||||
'mp1',
|
||||
'mp2',
|
||||
'mp4a',
|
||||
'mpa',
|
||||
'ra',
|
||||
'shn',
|
||||
'xm',
|
||||
|
||||
# image
|
||||
*MEDIA_EXTENSIONS.thumbnails,
|
||||
'avif',
|
||||
'bmp',
|
||||
'gif',
|
||||
'heic',
|
||||
'ico',
|
||||
'jng',
|
||||
'jpeg',
|
||||
'jxl',
|
||||
'svg',
|
||||
'tif',
|
||||
'tiff',
|
||||
'wbmp',
|
||||
|
||||
# subtitle
|
||||
*MEDIA_EXTENSIONS.subtitles,
|
||||
'dfxp',
|
||||
'fs',
|
||||
'ismt',
|
||||
'json3',
|
||||
'sami',
|
||||
'scc',
|
||||
'srv1',
|
||||
'srv2',
|
||||
'srv3',
|
||||
'ssa',
|
||||
'tt',
|
||||
'ttml',
|
||||
'xml',
|
||||
|
||||
# others
|
||||
*MEDIA_EXTENSIONS.manifests,
|
||||
*MEDIA_EXTENSIONS.storyboards,
|
||||
'desktop',
|
||||
'ism',
|
||||
'm3u',
|
||||
'sbv',
|
||||
'url',
|
||||
'webloc',
|
||||
])
|
||||
|
||||
def __init__(self, extension, /):
|
||||
super().__init__(f'unsafe file extension: {extension!r}')
|
||||
self.extension = extension
|
||||
|
||||
@classmethod
|
||||
def sanitize_extension(cls, extension, /, *, prepend=False):
|
||||
if extension is None:
|
||||
return None
|
||||
|
||||
if '/' in extension or '\\' in extension:
|
||||
raise cls(extension)
|
||||
|
||||
if not prepend:
|
||||
_, _, last = extension.rpartition('.')
|
||||
if last == 'bin':
|
||||
extension = last = 'unknown_video'
|
||||
if last.lower() not in cls.ALLOWED_EXTENSIONS:
|
||||
raise cls(extension)
|
||||
|
||||
return extension
|
||||
|
||||
|
||||
class RetryManager:
|
||||
"""Usage:
|
||||
for retry in RetryManager(...):
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2024.05.27'
|
||||
__version__ = '2024.07.07'
|
||||
|
||||
RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b'
|
||||
RELEASE_GIT_HEAD = 'b337d2989ce0614651d363383f6f743d977248ef'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
|
||||
|
||||
ORIGIN = 'yt-dlp/yt-dlp'
|
||||
|
||||
_pkg_version = '2024.05.27'
|
||||
_pkg_version = '2024.07.07'
|
||||
|
||||
Reference in New Issue
Block a user