1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-27 01:51:24 +00:00

Merge branch 'master' into ke/refactor-move-files-pp

This commit is contained in:
bashonly
2024-07-08 00:13:14 -05:00
committed by GitHub
102 changed files with 3221 additions and 1268 deletions

View File

@@ -4,6 +4,7 @@ import copy
import datetime as dt
import errno
import fileinput
import functools
import http.cookiejar
import io
import itertools
@@ -24,7 +25,7 @@ import traceback
import unicodedata
from .cache import Cache
from .compat import functools, urllib # isort: split
from .compat import urllib # isort: split
from .compat import compat_os_name, urllib_req_to_req
from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
@@ -158,7 +159,7 @@ from .utils import (
write_json_file,
write_string,
)
from .utils._utils import _YDLLogger
from .utils._utils import _UnsafeExtensionError, _YDLLogger
from .utils.networking import (
HTTPHeaderDict,
clean_headers,
@@ -171,6 +172,20 @@ if compat_os_name == 'nt':
import ctypes
def _catch_unsafe_extension_error(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except _UnsafeExtensionError as error:
self.report_error(
f'The extracted extension ({error.extension!r}) is unusual '
'and will be skipped for safety reasons. '
f'If you believe this is an error{bug_reports_message(",")}')
return wrapper
class YoutubeDL:
"""YoutubeDL class.
@@ -453,8 +468,9 @@ class YoutubeDL:
Set the value to 'native' to use the native downloader
compat_opts: Compatibility options. See "Differences in default behavior".
The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, format-sort
no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
filename, abort-on-error, multistreams, no-live-chat,
format-sort, no-clean-infojson, no-playlist-metafiles,
no-keep-subs, no-attach-info-json, allow-unsafe-ext.
Refer __init__.py for their implementation
progress_template: Dictionary of templates for progress outputs.
Allowed keys are 'download', 'postprocess',
@@ -581,8 +597,9 @@ class YoutubeDL:
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
}
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
@@ -1398,6 +1415,7 @@ class YoutubeDL:
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
return self.escape_outtmpl(outtmpl) % info_dict
@_catch_unsafe_extension_error
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
if outtmpl is None:
@@ -1925,6 +1943,8 @@ class YoutubeDL:
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_channel': ie_result.get('channel'),
'playlist_channel_id': ie_result.get('channel_id'),
**kwargs,
}
if strict:
@@ -3188,6 +3208,7 @@ class YoutubeDL:
os.remove(file)
return None
@_catch_unsafe_extension_error
def process_info(self, info_dict):
"""Process a single resolved IE result. (Modifies it in-place)"""

View File

@@ -64,6 +64,7 @@ from .utils import (
write_string,
)
from .utils.networking import std_headers
from .utils._utils import _UnsafeExtensionError
from .YoutubeDL import YoutubeDL
_IN_CLI = False
@@ -593,6 +594,13 @@ def validate_options(opts):
if opts.ap_username is not None and opts.ap_password is None:
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
# compat option changes global state destructively; only allow from cli
if 'allow-unsafe-ext' in opts.compat_opts:
warnings.append(
'Using allow-unsafe-ext opens you up to potential attacks. '
'Use with great care!')
_UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x
return warnings, deprecation_warnings

View File

@@ -1,16 +1,22 @@
tests = {
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
}
def what(file=None, h=None):
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
"""
if h is None:
with open(file, 'rb') as f:
h = f.read(12)
return next((type_ for type_, test in tests.items() if test(h)), None)
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
return 'webp'
if h.startswith(b'\x89PNG'):
return 'png'
if h.startswith(b'\xFF\xD8\xFF'):
return 'jpeg'
if h.startswith(b'GIF'):
return 'gif'
return None

View File

@@ -2,7 +2,9 @@ import base64
import collections
import contextlib
import datetime as dt
import functools
import glob
import hashlib
import http.cookiejar
import http.cookies
import io
@@ -17,14 +19,12 @@ import tempfile
import time
import urllib.request
from enum import Enum, auto
from hashlib import pbkdf2_hmac
from .aes import (
aes_cbc_decrypt_bytes,
aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7,
)
from .compat import functools # isort: split
from .compat import compat_os_name
from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON,
@@ -740,40 +740,38 @@ def _get_linux_desktop_environment(env, logger):
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
desktop_session = env.get('DESKTOP_SESSION', None)
if xdg_current_desktop is not None:
xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
if xdg_current_desktop == 'Unity':
if desktop_session is not None and 'gnome-fallback' in desktop_session:
for part in map(str.strip, xdg_current_desktop.split(':')):
if part == 'Unity':
if desktop_session is not None and 'gnome-fallback' in desktop_session:
return _LinuxDesktopEnvironment.GNOME
else:
return _LinuxDesktopEnvironment.UNITY
elif part == 'Deepin':
return _LinuxDesktopEnvironment.DEEPIN
elif part == 'GNOME':
return _LinuxDesktopEnvironment.GNOME
else:
return _LinuxDesktopEnvironment.UNITY
elif xdg_current_desktop == 'Deepin':
return _LinuxDesktopEnvironment.DEEPIN
elif xdg_current_desktop == 'GNOME':
return _LinuxDesktopEnvironment.GNOME
elif xdg_current_desktop == 'X-Cinnamon':
return _LinuxDesktopEnvironment.CINNAMON
elif xdg_current_desktop == 'KDE':
kde_version = env.get('KDE_SESSION_VERSION', None)
if kde_version == '5':
return _LinuxDesktopEnvironment.KDE5
elif kde_version == '6':
return _LinuxDesktopEnvironment.KDE6
elif kde_version == '4':
return _LinuxDesktopEnvironment.KDE4
else:
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
return _LinuxDesktopEnvironment.KDE4
elif xdg_current_desktop == 'Pantheon':
return _LinuxDesktopEnvironment.PANTHEON
elif xdg_current_desktop == 'XFCE':
return _LinuxDesktopEnvironment.XFCE
elif xdg_current_desktop == 'UKUI':
return _LinuxDesktopEnvironment.UKUI
elif xdg_current_desktop == 'LXQt':
return _LinuxDesktopEnvironment.LXQT
else:
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
elif part == 'X-Cinnamon':
return _LinuxDesktopEnvironment.CINNAMON
elif part == 'KDE':
kde_version = env.get('KDE_SESSION_VERSION', None)
if kde_version == '5':
return _LinuxDesktopEnvironment.KDE5
elif kde_version == '6':
return _LinuxDesktopEnvironment.KDE6
elif kde_version == '4':
return _LinuxDesktopEnvironment.KDE4
else:
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
return _LinuxDesktopEnvironment.KDE4
elif part == 'Pantheon':
return _LinuxDesktopEnvironment.PANTHEON
elif part == 'XFCE':
return _LinuxDesktopEnvironment.XFCE
elif part == 'UKUI':
return _LinuxDesktopEnvironment.UKUI
elif part == 'LXQt':
return _LinuxDesktopEnvironment.LXQT
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
elif desktop_session is not None:
if desktop_session == 'deepin':
@@ -1001,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger):
def pbkdf2_sha1(password, salt, iterations, key_length):
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):

View File

@@ -1,4 +1,5 @@
import enum
import functools
import json
import os
import re
@@ -9,7 +10,6 @@ import time
import uuid
from .fragment import FragmentFD
from ..compat import functools
from ..networking import Request
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
@@ -108,7 +108,7 @@ class ExternalFD(FragmentFD):
return all((
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'),
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
))

View File

@@ -160,10 +160,12 @@ class HlsFD(FragmentFD):
extra_state = ctx.setdefault('extra_state', {})
format_index = info_dict.get('format_index')
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
extra_segment_query = None
if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
extra_key_query = None
if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
@@ -190,8 +192,8 @@ class HlsFD(FragmentFD):
if frag_index <= ctx['fragment_index']:
continue
frag_url = urljoin(man_url, line)
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
if extra_segment_query:
frag_url = update_url_query(frag_url, extra_segment_query)
fragments.append({
'frag_index': frag_index,
@@ -212,8 +214,8 @@ class HlsFD(FragmentFD):
frag_index += 1
map_info = parse_m3u8_attributes(line[11:])
frag_url = urljoin(man_url, map_info.get('URI'))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
if extra_segment_query:
frag_url = update_url_query(frag_url, extra_segment_query)
if map_info.get('BYTERANGE'):
splitted_byte_range = map_info.get('BYTERANGE').split('@')
@@ -244,8 +246,10 @@ class HlsFD(FragmentFD):
decrypt_info['KEY'] = external_aes_key
else:
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
if extra_key_query or extra_segment_query:
# Fall back to extra_segment_query to key for backwards compat
decrypt_info['URI'] = update_url_query(
decrypt_info['URI'], extra_key_query or extra_segment_query)
if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None

View File

@@ -76,6 +76,7 @@ from .aenetworks import (
)
from .aeonco import AeonCoIE
from .afreecatv import (
AfreecaTVCatchStoryIE,
AfreecaTVIE,
AfreecaTVLiveIE,
AfreecaTVUserIE,
@@ -779,6 +780,7 @@ from .gopro import GoProIE
from .goshgay import GoshgayIE
from .gotostage import GoToStageIE
from .gputechconf import GPUTechConfIE
from .graspop import GraspopIE
from .gronkh import (
GronkhFeedIE,
GronkhIE,
@@ -969,6 +971,10 @@ from .la7 import (
LA7PodcastEpisodeIE,
LA7PodcastIE,
)
from .laracasts import (
LaracastsIE,
LaracastsPlaylistIE,
)
from .lastfm import (
LastFMIE,
LastFMPlaylistIE,
@@ -1113,12 +1119,15 @@ from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .metacritic import MetacriticIE
from .mgtv import MGTVIE
from .microsoftembed import MicrosoftEmbedIE
from .microsoftstream import MicrosoftStreamIE
from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyCourseIE,
MicrosoftVirtualAcademyIE,
from .microsoftembed import (
MicrosoftBuildIE,
MicrosoftEmbedIE,
MicrosoftLearnEpisodeIE,
MicrosoftLearnPlaylistIE,
MicrosoftLearnSessionIE,
MicrosoftMediusIE,
)
from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
@@ -1603,6 +1612,7 @@ from .qqmusic import (
QQMusicPlaylistIE,
QQMusicSingerIE,
QQMusicToplistIE,
QQMusicVideoIE,
)
from .r7 import (
R7IE,
@@ -1928,6 +1938,10 @@ from .spreaker import (
)
from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
from .sproutvideo import (
SproutVideoIE,
VidsIoIE,
)
from .srgssr import (
SRGSSRIE,
SRGSSRPlayIE,
@@ -2310,6 +2324,7 @@ from .vidio import (
)
from .vidlii import VidLiiIE
from .vidly import VidlyIE
from .vidyard import VidyardIE
from .viewlift import (
ViewLiftEmbedIE,
ViewLiftIE,
@@ -2375,6 +2390,10 @@ from .vrt import (
VrtNUIE,
)
from .vtm import VTMIE
from .vtv import (
VTVIE,
VTVGoIE,
)
from .vuclip import VuClipIE
from .vvvvid import (
VVVVIDIE,

View File

@@ -368,6 +368,7 @@ class AbemaTVIE(AbemaTVBaseIE):
info['episode_number'] = epis if epis < 2000 else None
is_live, m3u8_url = False, None
availability = 'public'
if video_type == 'now-on-air':
is_live = True
channel_url = 'https://api.abema.io/v1/channels'
@@ -389,6 +390,7 @@ class AbemaTVIE(AbemaTVBaseIE):
if 3 not in ondemand_types:
# cannot acquire decryption key for these streams
self.report_warning('This is a premium-only stream')
availability = 'premium_only'
info.update(traverse_obj(api_response, {
'series': ('series', 'title'),
'season': ('season', 'name'),
@@ -408,6 +410,7 @@ class AbemaTVIE(AbemaTVBaseIE):
headers=headers)
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
self.report_warning('This is a premium-only stream')
availability = 'premium_only'
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
else:
@@ -425,6 +428,7 @@ class AbemaTVIE(AbemaTVBaseIE):
'description': description,
'formats': formats,
'is_live': is_live,
'availability': availability,
})
return info

View File

@@ -72,7 +72,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/(PLAYER/STATION|player)/
)
(?P<id>\d+)
(?P<id>\d+)/?(?:$|[?#&])
'''
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
@@ -189,7 +189,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
headers={'Referer': url}, data=urlencode_postdata({
'nTitleNo': video_id,
'nApiLevel': 10,
}))['data']
}), impersonate=True)['data']
error_code = traverse_obj(data, ('code', {int}))
if error_code == -6221:
@@ -253,6 +253,43 @@ class AfreecaTVIE(AfreecaTVBaseIE):
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:catchstory'
IE_DESC = 'afreecatv.com catch story'
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
_TESTS = [{
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
'info_dict': {
'id': '103247',
},
'playlist_count': 2,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url},
query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True)
return self.playlist_result(self._entries(data), video_id)
@staticmethod
def _entries(data):
# 'files' is always a list with 1 element
yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch',
'catch_list', lambda _, v: v['files'][0]['file'], {
'id': ('files', 0, 'file_info_key', {str}),
'url': ('files', 0, 'file', {url_or_none}),
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
'title': ('title', {str}),
'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}),
'timestamp': ('write_timestamp', {int_or_none}),
}))
class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:live'
IE_DESC = 'afreecatv.com livestreams'

View File

@@ -4,6 +4,7 @@ from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
join_nonempty,
parse_iso8601,
try_get,
)
@@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
else:
vbr = int_or_none(s.get('bitrate'))
formats.append({
'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
'format_id': join_nonempty(stream_type, vbr),
'vbr': vbr,
'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')),

View File

@@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
href, video_id, f4m_id='hds', fatal=False))
elif mime_type == 'application/dash+xml':
formats.extend(self._extract_f4m_formats(
href, video_id, f4m_id='hds', fatal=False))
formats.extend(self._extract_mpd_formats(
href, video_id, mpd_id='dash', fatal=False))
elif mime_type == 'application/vnd.ms-sstr+xml':
formats.extend(self._extract_ism_formats(
href, video_id, ism_id='mss', fatal=False))

View File

@@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
]
_API_BASE = 'https://api.atresplayer.com/'
def _handle_error(self, e, code):
if isinstance(e.cause, HTTPError) and e.cause.status == code:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
def _perform_login(self, username, password):
self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page')
@@ -55,7 +47,9 @@ class AtresPlayerIE(InfoExtractor):
'password': password,
}))['targetUrl']
except ExtractorError as e:
self._handle_error(e, 400)
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Invalid username and/or password', expected=True)
raise
self._request_webpage(target_url, None, 'Following Target URL')
@@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor):
episode = self._download_json(
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
except ExtractorError as e:
self._handle_error(e, 403)
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
title = episode['titulo']

View File

@@ -4,9 +4,13 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
InAdvancePagedList,
determine_ext,
format_field,
int_or_none,
join_nonempty,
traverse_obj,
unified_timestamp,
url_or_none,
)
@@ -30,6 +34,7 @@ class BanByeBaseIE(InfoExtractor):
class BanByeIE(BanByeBaseIE):
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
_TESTS = [{
# ['src']['mp4']['levels'] direct mp4 urls only
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
'info_dict': {
@@ -58,6 +63,7 @@ class BanByeIE(BanByeBaseIE):
},
'playlist_mincount': 9,
}, {
# ['src']['mp4']['levels'] direct mp4 urls only
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
'info_dict': {
'id': 'v_kb6_o1Kyq-CD',
@@ -77,6 +83,48 @@ class BanByeIE(BanByeBaseIE):
'view_count': int,
'comment_count': int,
},
}, {
# ['src']['hls']['levels'] variant m3u8 urls only; master m3u8 is 404
'url': 'https://banbye.com/watch/v_a_gPFuC9LoW5',
'info_dict': {
'id': 'v_a_gPFuC9LoW5',
'ext': 'mp4',
'title': 'md5:183524056bebdfa245fd6d214f63c0fe',
'description': 'md5:943ac87287ca98d28d8b8797719827c6',
'uploader': 'wRealu24',
'channel_id': 'ch_wrealu24',
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
'upload_date': '20231113',
'timestamp': 1699874062,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'thumbnail': 'https://cdn.banbye.com/video/v_a_gPFuC9LoW5/96.webp',
'tags': ['jaszczur', 'sejm', 'lewica', 'polska', 'ukrainizacja', 'pierwszeposiedzeniesejmu'],
},
'expected_warnings': ['Failed to download m3u8'],
}, {
# ['src']['hls']['masterPlaylist'] m3u8 only
'url': 'https://banbye.com/watch/v_B0rsKWsr-aaa',
'info_dict': {
'id': 'v_B0rsKWsr-aaa',
'ext': 'mp4',
'title': 'md5:00b254164b82101b3f9e5326037447ed',
'description': 'md5:3fd8b48aa81954ba024bc60f5de6e167',
'uploader': 'PSTV Piotr Szlachtowicz ',
'channel_id': 'ch_KV9EVObkB9wB',
'channel_url': 'https://banbye.com/channel/ch_KV9EVObkB9wB',
'upload_date': '20240629',
'timestamp': 1719646816,
'duration': 2377,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'thumbnail': 'https://cdn.banbye.com/video/v_B0rsKWsr-aaa/96.webp',
'tags': ['Biden', 'Trump', 'Wybory', 'USA'],
},
}]
def _real_extract(self, url):
@@ -91,11 +139,24 @@ class BanByeIE(BanByeBaseIE):
'id': f'{quality}p',
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
} for quality in [48, 96, 144, 240, 512, 1080]]
formats = [{
'format_id': f'http-{quality}p',
'quality': quality,
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
} for quality in data['quality']]
formats = []
url_data = self._download_json(f'{self._API_BASE}/videos/{video_id}/url', video_id, data=b'')
if master_url := traverse_obj(url_data, ('src', 'hls', 'masterPlaylist', {url_or_none})):
formats = self._extract_m3u8_formats(master_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
for format_id, format_url in traverse_obj(url_data, (
'src', ('mp4', 'hls'), 'levels', {dict.items}, lambda _, v: url_or_none(v[1]))):
ext = determine_ext(format_url)
is_hls = ext == 'm3u8'
formats.append({
'url': format_url,
'ext': 'mp4' if is_hls else ext,
'format_id': join_nonempty(is_hls and 'hls', format_id),
'protocol': 'm3u8_native' if is_hls else 'https',
'height': int_or_none(format_id),
})
self._remove_duplicate_formats(formats)
return {
'id': video_id,

View File

@@ -41,7 +41,7 @@ class BandcampIE(InfoExtractor):
'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
},
'_skip': 'There is a limit of 200 free downloads / month for the test song',
'skip': 'There is a limit of 200 free downloads / month for the test song',
}, {
# free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',

View File

@@ -31,12 +31,12 @@ from ..utils import (
mimetype2ext,
parse_count,
parse_qs,
parse_resolution,
qualities,
smuggle_url,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_call,
unified_timestamp,
unsmuggle_url,
url_or_none,
@@ -47,6 +47,23 @@ from ..utils import (
class BilibiliBaseIE(InfoExtractor):
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
_wbi_key_cache = {}
@property
def is_logged_in(self):
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
def _check_missing_formats(self, play_info, formats):
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
missing_formats = join_nonempty(*[
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
for fmt in traverse_obj(play_info, (
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
if missing_formats:
self.to_screen(
f'Format(s) {missing_formats} are missing; you have to login or '
f'become a premium member to download them. {self._login_hint()}')
def extract_formats(self, play_info):
format_names = {
@@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor):
'format': format_names.get(video.get('id')),
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
if missing_formats:
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
f'you have to login or become premium member to download them. {self._login_hint()}')
if formats:
self._check_missing_formats(play_info, formats)
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}),
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
'filesize': ('size', {int_or_none}),
}))
if fragments:
formats.append({
'url': fragments[0]['url'],
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
**({
'fragments': fragments,
'protocol': 'http_dash_segments',
} if len(fragments) > 1 else {}),
**traverse_obj(play_info, {
'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}),
'format_note': ('quality', {lambda x: format_names.get(x)}),
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
}),
**parse_resolution(format_names.get(play_info.get('quality'))),
})
return formats
def _download_playinfo(self, video_id, cid, headers=None):
def _get_wbi_key(self, video_id):
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
return self._wbi_key_cache['key']
session_data = self._download_json(
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
lookup = ''.join(traverse_obj(session_data, (
'data', 'wbi_img', ('img_url', 'sub_url'),
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
# from getMixinKey() in the vendor js
mixin_key_enc_tab = [
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
36, 20, 34, 44, 52,
]
self._wbi_key_cache.update({
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
'ts': time.time(),
})
return self._wbi_key_cache['key']
def _sign_wbi(self, params, video_id):
params['wts'] = round(time.time())
params = {
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
for k, v in sorted(params.items())
}
query = urllib.parse.urlencode(params)
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
return params
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
if qn:
params['qn'] = qn
return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query=self._sign_wbi(params, bvid), headers=headers,
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
def json2srt(self, json_data):
srt_data = ''
@@ -115,15 +189,15 @@ class BilibiliBaseIE(InfoExtractor):
}],
}
subtitle_info = traverse_obj(self._download_json(
video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
for s in subs_list:
note=f'Extracting subtitle info {cid}')
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
self.report_warning(
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
for s in traverse_obj(video_info, (
'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
subtitles.setdefault(s['lan'], []).append({
'ext': 'srt',
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
@@ -203,15 +277,15 @@ class BilibiliBaseIE(InfoExtractor):
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
return cid_edges
def _get_interactive_entries(self, video_id, cid, metainfo):
def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
graph_version = traverse_obj(
self._download_json(
'https://api.bilibili.com/x/player/wbi/v2', video_id,
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid)
play_info = self._download_playinfo(video_id, cid, headers=headers)
yield {
**metainfo,
'id': f'{video_id}_{cid}',
@@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
'timestamp': 1488353834,
'like_count': int,
'view_count': int,
'_old_archive_ids': ['bilibili 8903802_part1'],
},
}, {
'note': 'old av URL version',
'url': 'http://www.bilibili.com/video/av1074402/',
'info_dict': {
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
'id': 'BV11x411K7CN',
'ext': 'mp4',
'title': '【金坷垃】金泡沫',
'uploader': '菊子桑',
'uploader_id': '156160',
'id': 'BV11x411K7CN',
'title': '【金坷垃】金泡沫',
'duration': 308.36,
'upload_date': '20140420',
'timestamp': 1397983878,
@@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int,
'view_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
'_old_archive_ids': ['bilibili 1074402_part1'],
},
'params': {'skip_download': True},
}, {
@@ -288,6 +364,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
'_old_archive_ids': ['bilibili 498159642_part1'],
},
}],
}, {
@@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
'_old_archive_ids': ['bilibili 498159642_part1'],
},
}, {
'note': 'video has subtitles',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
'id': 'BV12N4y1M7rh',
'ext': 'mp4',
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
'tags': list,
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2',
},
'params': {'listsubtitles': True},
}, {
'url': 'https://www.bilibili.com/video/av8903802/',
'info_dict': {
@@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int,
'view_count': int,
'like_count': int,
'_old_archive_ids': ['bilibili 8903802_part1'],
},
'params': {
'skip_download': True,
@@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 463665680_part1'],
},
'params': {'skip_download': True},
}, {
@@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 893839363_part1'],
},
'params': {'skip_download': True},
}, {
'note': 'newer festival video',
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
@@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 778246196_part1'],
},
}, {
'note': 'legacy flv/mp4 video',
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
'info_dict': {
'id': 'BV1ms411Q7vw_p4',
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
'timestamp': 1458222815,
'upload_date': '20160317',
'description': '云南方言快乐生产线出品',
'duration': float,
'uploader': '一笑颠天',
'uploader_id': '3916081',
'view_count': int,
'comment_count': int,
'like_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 4120229_part4'],
},
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
'playlist_count': 19,
'playlist': [{
'info_dict': {
'id': 'BV1ms411Q7vw_p4_0',
'ext': 'flv',
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
'duration': 399.102,
},
}],
}, {
'note': 'legacy mp4-only video',
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
'info_dict': {
'id': 'BV1nx411u79K',
'ext': 'mp4',
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
'timestamp': 1508893551,
'upload_date': '20171025',
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
'duration': 80.384,
'uploader': '伯远',
'uploader_id': '10584494',
'comment_count': int,
'view_count': int,
'like_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 15700301_part1'],
},
'params': {'skip_download': True},
}, {
'note': 'interactive/split-path video',
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
@@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 292734508_part1'],
},
'playlist_count': 33,
'playlist': [{
@@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 292734508_part1'],
},
}],
}, {
@@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE):
'upload_date': '20191021',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}, {
'note': 'video has subtitles, which requires login',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
'id': 'BV12N4y1M7rh',
'ext': 'mp4',
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
'tags': list,
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2', # login required for CC subtitle
'_old_archive_ids': ['bilibili 898179753_part1'],
},
'params': {'listsubtitles': True},
'skip': 'login required for subtitle',
}, {
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
'info_dict': {
@@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE):
if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
headers['Referer'] = url
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
is_festival = 'videoData' not in initial_state
if is_festival:
video_data = initial_state['videoInfo']
@@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE):
aid = video_data.get('aid')
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
festival_info = {}
@@ -586,18 +719,65 @@ class BiliBiliIE(BilibiliBaseIE):
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
if is_interactive:
return self.playlist_result(
self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
__post_extractor=self.extract_comments(aid))
else:
return {
**metainfo,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'formats': self.extract_formats(play_info),
'__post_extractor': self.extract_comments(aid),
}
formats = self.extract_formats(play_info)
if not traverse_obj(play_info, ('dash')):
# we only have legacy formats and need additional work
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
formats.extend(traverse_obj(
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
lambda _, v: not has_qn(v['quality'])))
self._check_missing_formats(play_info, formats)
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
if flv_formats and len(flv_formats) < len(formats):
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
if not self._configuration_arg('prefer_multi_flv'):
dropped_fmts = ', '.join(
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
if dropped_fmts:
self.to_screen(
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
else:
formats = traverse_obj(
# XXX: Filtering by extractor-arg is for testing purposes
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
) or [max(flv_formats, key=lambda x: x['quality'])]
if traverse_obj(formats, (0, 'fragments')):
# We have flv formats, which are individual short videos with their own timestamps and metainfo
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
return {
**metainfo,
'_type': 'multi_video',
'entries': [{
'id': f'{metainfo["id"]}_{idx}',
'title': metainfo['title'],
'http_headers': metainfo['http_headers'],
'formats': [{
**fragment,
'format_id': formats[0].get('format_id'),
}],
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
} for idx, fragment in enumerate(formats[0]['fragments'])],
'duration': float_or_none(play_info.get('timelength'), scale=1000),
}
else:
return {
**metainfo,
'formats': formats,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'__post_extractor': self.extract_comments(aid),
}
class BiliBiliBangumiIE(BilibiliBaseIE):
@@ -968,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
}))
class BilibiliSpaceBaseIE(InfoExtractor):
class BilibiliSpaceBaseIE(BilibiliBaseIE):
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
first_page = fetch_page(0)
metadata = get_metadata(first_page)
@@ -988,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
'id': '3985676',
},
'playlist_mincount': 178,
'skip': 'login required',
}, {
'url': 'https://space.bilibili.com/313580179/video',
'info_dict': {
'id': '313580179',
},
'playlist_mincount': 92,
'skip': 'login required',
}]
def _extract_signature(self, playlist_id):
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
img_key = traverse_obj(
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
sub_key = traverse_obj(
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
session_key = img_key + sub_key
signature_values = []
for position in (
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
57, 62, 11, 36, 20, 34, 44, 52,
):
char_at_position = try_call(lambda: session_key[position])
if char_at_position:
signature_values.append(char_at_position)
return ''.join(signature_values)[:32]
def _real_extract(self, url):
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
if not is_video_url:
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
'To download audios, add a "/audio" to the URL')
signature = self._extract_signature(playlist_id)
def fetch_page(page_idx):
query = {
'keyword': '',
'mid': playlist_id,
'order': 'pubdate',
'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
'order_avoided': 'true',
'platform': 'web',
'pn': page_idx + 1,
'ps': 30,
'tid': 0,
'web_location': 1550101,
'wts': int(time.time()),
}
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
try:
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
playlist_id, note=f'Downloading page {page_idx}', query=query,
headers={'referer': url})
response = self._download_json(
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
query=self._sign_wbi(query, playlist_id),
note=f'Downloading space page {page_idx}', headers={'Referer': url})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise
if response['code'] in (-352, -401):
status_code = response['code']
if status_code == -401:
raise ExtractorError(
f'Request is blocked by server ({-response["code"]}), '
'please add cookies, wait and try later.', expected=True)
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
elif status_code == -352 and not self.is_logged_in:
self.raise_login_required('Request is rejected, you need to login to access playlist')
elif status_code != 0:
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
return response['data']
def get_metadata(page_data):
@@ -1280,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.bilibili.com/watchlater/#/list',
'info_dict': {'id': 'watchlater'},
'info_dict': {
'id': r're:\d+',
'title': '稍后再看',
},
'playlist_mincount': 0,
'skip': 'login required',
}]
@@ -1356,14 +1519,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'skip': 'redirect url',
}, {
'url': 'https://www.bilibili.com/list/watchlater',
'info_dict': {'id': 'watchlater'},
'info_dict': {
'id': r're:2_\d+',
'title': '稍后再看',
'uploader': str,
'uploader_id': str,
},
'playlist_mincount': 0,
'skip': 'login required',
}, {
'url': 'https://www.bilibili.com/medialist/play/watchlater',
'info_dict': {'id': 'watchlater'},
'playlist_mincount': 0,
'skip': 'login required',
'skip': 'redirect url & login required',
}]
def _extract_medialist(self, query, list_id):
@@ -1414,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'title': ('title', {str}),
'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}),
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
'thumbnail': ('cover', {url_or_none}),
})),
}
@@ -1808,7 +1976,8 @@ class BiliIntlBaseIE(InfoExtractor):
public_key = Cryptodome.RSA.importKey(key_data['key'])
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
login_post = self._download_json(
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
data=urlencode_postdata({
'username': username,
'password': base64.b64encode(password_hash).decode('ascii'),
'keep_me': 'true',
@@ -2140,7 +2309,8 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
def _real_extract(self, url):
series_id = self._match_id(url)
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
series_info = self._call_api(
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
return self.playlist_result(
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),

View File

@@ -24,7 +24,7 @@ from ..utils import (
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
@@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
}, {
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
'only_matching': True,
}, {
'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
'only_matching': True,
}]
_GEO_BYPASS = False
@@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
self._raise_if_restricted(webpage)
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
@@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor):
class BitChuteChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bitchute.com/channel/bitchute/',
'info_dict': {
'id': 'bitchute',
'title': 'BitChute',
'description': 'md5:5329fb3866125afa9446835594a9b138',
'description': 'md5:2134c37d64fc3a4846787c402956adac',
},
'playlist': [
{
@@ -210,6 +213,9 @@ class BitChuteChannelIE(InfoExtractor):
'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:747724ef404eebdfc04277714f81863e',
},
}, {
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
'only_matching': True,
}]
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
@@ -230,7 +236,7 @@ class BitChuteChannelIE(InfoExtractor):
@staticmethod
def _make_url(playlist_id, playlist_type):
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
def _fetch_page(self, playlist_id, playlist_type, page_num):
playlist_url = self._make_url(playlist_id, playlist_type)

View File

@@ -18,6 +18,7 @@ from ..utils import (
fix_xml_ampersands,
float_or_none,
int_or_none,
join_nonempty,
js_to_json,
mimetype2ext,
parse_iso8601,
@@ -386,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor):
@classmethod
def _make_brightcove_url(cls, params):
return update_url_query(
'http://c.brightcove.com/services/viewer/htmlFederated', params)
'https://c.brightcove.com/services/viewer/htmlFederated', params)
@classmethod
def _extract_brightcove_url(cls, webpage):
@@ -470,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor):
if referer:
headers['Referer'] = referer
player_page = self._download_webpage(
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
'https://link.brightcove.com/services/player/bcpid' + player_id[0],
video_id, headers=headers, fatal=False)
if player_page:
player_key = self._search_regex(
@@ -480,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor):
enc_pub_id = player_key.split(',')[1].replace('~', '=')
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
if publisher_id:
brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
if referer:
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
@@ -538,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
})
def build_format_id(kind):
format_id = kind
if tbr:
format_id += f'-{int(tbr)}k'
if height:
format_id += f'-{height}p'
return format_id
return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
if src or streaming_src:
f.update({
@@ -801,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
# Look for iframe embeds [1]
for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url if url.startswith('http') else 'http:' + url)
entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
# Look for <video> tags [2] and embed_in_page embeds [3]
# [2] looks like:
@@ -830,7 +826,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
player_id = player_id or attrs.get('data-player') or 'default'
embed = embed or attrs.get('data-embed') or 'default'
bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
# Some brightcove videos may be embedded with video tag only and
# without script tag or any mentioning of brightcove at all. Such
@@ -867,7 +863,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
def extract_policy_key():
base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/'
base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
config = self._download_json(
base_url + 'config.json', video_id, fatal=False) or {}
policy_key = try_get(

View File

@@ -455,10 +455,8 @@ class CBCGemIE(InfoExtractor):
def claims_token_expired(self):
exp = self._get_claims_token_expiry()
if exp - time.time() < 10:
# It will expire in less than 10 seconds, or has already expired
return True
return False
# It will expire in less than 10 seconds, or has already expired
return exp - time.time() < 10
def claims_token_valid(self):
return self._claims_token is not None and not self.claims_token_expired()

View File

@@ -1,6 +1,5 @@
import base64
import re
import urllib.error
import urllib.parse
import zlib

View File

@@ -1,63 +1,50 @@
from .common import InfoExtractor
from ..utils import traverse_obj
from .vidyard import VidyardBaseIE, VidyardIE
from ..utils import ExtractorError, make_archive_id, url_basename
class CellebriteIE(InfoExtractor):
class CellebriteIE(VidyardBaseIE):
_VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
'info_dict': {
'id': '16025876',
'id': 'ZqmUss3dQfEMGpauambPuH',
'display_id': '16025876',
'ext': 'mp4',
'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
'duration': 455,
'tags': [],
'description': 'md5:dee48fe12bbae5c01fe6a053f7676da4',
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
'duration': 455.979,
'_old_archive_ids': ['cellebrite 16025876'],
},
}, {
'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
'info_dict': {
'id': '29018255',
'id': 'QV1U8a2yzcxigw7VFnqKyg',
'display_id': '29018255',
'ext': 'mp4',
'duration': 134,
'tags': [],
'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
'title': 'How to Lawfully Collect the Maximum Amount of Data From Android Devices',
'description': 'md5:0e943a9ac14c374d5d74faed634d773c',
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
'title': 'Android Extractions Explained',
'duration': 134.315,
'_old_archive_ids': ['cellebrite 29018255'],
},
}]
def _get_formats_and_subtitles(self, json_data, display_id):
formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
subtitles = {}
for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
fmt, sub = self._extract_m3u8_formats_and_subtitles(
url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
formats.extend(fmt)
self._merge_subtitles(sub, target=subtitles)
return formats, subtitles
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
slug = self._match_id(url)
webpage = self._download_webpage(url, slug)
vidyard_url = next(VidyardIE._extract_embed_urls(url, webpage), None)
if not vidyard_url:
raise ExtractorError('No Vidyard video embeds found on page')
player_uuid = self._search_regex(
r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
json_data = self._download_json(
f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
video_id = url_basename(vidyard_url)
info = self._process_video_json(self._fetch_video_json(video_id)['chapters'][0], video_id)
if info.get('display_id'):
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
if thumbnail := self._og_search_thumbnail(webpage, default=None):
info.setdefault('thumbnails', []).append({'url': thumbnail})
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
return {
'id': str(json_data['videoId']),
'title': json_data.get('name') or self._og_search_title(webpage),
'formats': formats,
'subtitles': subtitles,
'description': json_data.get('description') or self._og_search_description(webpage),
'duration': json_data.get('seconds'),
'tags': json_data.get('tags'),
'thumbnail': self._og_search_thumbnail(webpage),
'http_headers': {'Referer': 'https://play.vidyard.com/'},
'description': self._og_search_description(webpage, default=None),
**info,
}

View File

@@ -36,7 +36,7 @@ class CHZZKLiveIE(InfoExtractor):
def _real_extract(self, url):
channel_id = self._match_id(url)
live_detail = self._download_json(
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
f'https://api.chzzk.naver.com/service/v3/channels/{channel_id}/live-detail', channel_id,
note='Downloading channel info', errnote='Unable to download channel info')['content']
if live_detail.get('status') == 'CLOSE':
@@ -106,12 +106,45 @@ class CHZZKVideoIE(InfoExtractor):
'upload_date': '20231219',
'view_count': int,
},
'skip': 'Replay video is expired',
}, {
# Manually uploaded video
'url': 'https://chzzk.naver.com/video/1980',
'info_dict': {
'id': '1980',
'ext': 'mp4',
'title': '※시청주의※한번보면 잊기 힘든 영상',
'channel': '라디유radiyu',
'channel_id': '68f895c59a1043bc5019b5e08c83a5c5',
'channel_is_verified': False,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 95,
'timestamp': 1703102631.722,
'upload_date': '20231220',
'view_count': int,
},
}, {
# Partner channel replay video
'url': 'https://chzzk.naver.com/video/2458',
'info_dict': {
'id': '2458',
'ext': 'mp4',
'title': '첫 방송',
'channel': '강지',
'channel_id': 'b5ed5db484d04faf4d150aedd362f34b',
'channel_is_verified': True,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 4433,
'timestamp': 1703307460.214,
'upload_date': '20231223',
'view_count': int,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_meta = self._download_json(
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
note='Downloading video info', errnote='Unable to download video info')['content']
formats, subtitles = self._extract_mpd_formats_and_subtitles(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,

View File

@@ -6,11 +6,11 @@ from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor):
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
]
_TESTS = [{
@@ -24,6 +24,14 @@ class CloudflareStreamIE(InfoExtractor):
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
'info_dict': {
'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
},
}, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
'only_matching': True,
@@ -36,6 +44,9 @@ class CloudflareStreamIE(InfoExtractor):
}, {
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
'only_matching': True,
}, {
'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',

View File

@@ -1,3 +1,5 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor):
'duration': 1205,
'upload_date': '20221130',
},
}, {
# Video-only m3u8 formats need manual fixup
'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
'md5': 'fc472e40f6e6238446509be411c920e2',
'info_dict': {
'id': '08j_d24-6000-074',
'ext': 'mp4',
'upload_date': '20240620',
'duration': 1673,
'title': 'D24-6000-074-cetstud',
'timestamp': 1718902233,
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
},
'params': {'format': 'bv'},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
@@ -63,6 +79,9 @@ class CloudyCDNIE(InfoExtractor):
formats, subtitles = [], {}
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
for fmt in fmts:
if re.search(r'chunklist_b\d+_vo_', fmt['url']):
fmt['acodec'] = 'none'
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)

View File

@@ -234,7 +234,14 @@ class InfoExtractor:
'maybe' if the format may have DRM and has to be tested before download.
* extra_param_to_segment_url A query string to append to each
fragment's URL, or to update each existing query string
with. Only applied by the native HLS/DASH downloaders.
with. If it is an HLS stream with an AES-128 decryption key,
the query paramaters will be passed to the key URI as well,
unless there is an `extra_param_to_key_url` given,
or unless an external key URI is provided via `hls_aes`.
Only applied by the native HLS/DASH downloaders.
* extra_param_to_key_url A query string to append to the URL
of the format's HLS AES-128 decryption key.
Only applied by the native HLS downloader.
* hls_aes A dictionary of HLS AES-128 decryption information
used by the native HLS downloader to override the
values in the media playlist when an '#EXT-X-KEY' tag
@@ -2215,6 +2222,11 @@ class InfoExtractor:
'quality': quality,
'has_drm': has_drm,
}
# YouTube-specific
if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
f['language'] = yt_audio_content_id.split('.')[0]
resolution = last_stream_inf.get('RESOLUTION')
if resolution:
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)

View File

@@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import (
determine_protocol,
int_or_none,
join_nonempty,
try_get,
unescapeHTML,
)
@@ -52,7 +53,7 @@ class DailyMailIE(InfoExtractor):
is_hls = container == 'M2TS'
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
formats.append({
'format_id': ('hls' if is_hls else protocol) + (f'-{tbr}' if tbr else ''),
'format_id': join_nonempty('hls' if is_hls else protocol, tbr),
'url': rendition_url,
'width': int_or_none(rendition.get('frameWidth')),
'height': int_or_none(rendition.get('frameHeight')),

View File

@@ -1,16 +1,16 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_resolution,
traverse_obj,
try_get,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class DigitalConcertHallIE(InfoExtractor):
IE_DESC = 'DigitalConcertHall extractor'
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
_ACCESS_TOKEN = None
_NETRC_MACHINE = 'digitalconcerthall'
@@ -26,7 +26,8 @@ class DigitalConcertHallIE(InfoExtractor):
'upload_date': '20210624',
'timestamp': 1624548600,
'duration': 2798,
'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
'composers': ['Kurt Weill'],
},
'params': {'skip_download': 'm3u8'},
}, {
@@ -34,8 +35,9 @@ class DigitalConcertHallIE(InfoExtractor):
'url': 'https://www.digitalconcerthall.com/en/concert/53785',
'info_dict': {
'id': '53785',
'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
'playlist_count': 3,
@@ -49,9 +51,20 @@ class DigitalConcertHallIE(InfoExtractor):
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
'upload_date': '20220714',
'timestamp': 1657785600,
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'Concert with several works and an interview',
'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
'info_dict': {
'id': '53785',
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
'playlist_count': 1,
}]
def _perform_login(self, username, password):
@@ -97,15 +110,14 @@ class DigitalConcertHallIE(InfoExtractor):
'Accept-Language': language,
})
m3u8_url = traverse_obj(
stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
formats = []
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False))
yield {
'id': video_id,
'title': item.get('title'),
'composer': item.get('name_composer'),
'url': m3u8_url,
'formats': formats,
'duration': item.get('duration_total'),
'timestamp': traverse_obj(item, ('date', 'published')),
@@ -119,31 +131,32 @@ class DigitalConcertHallIE(InfoExtractor):
}
def _real_extract(self, url):
language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part')
if not language:
language = 'en'
thumbnail_url = self._html_search_regex(
r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
self._download_webpage(url, video_id), 'thumbnail')
thumbnails = [{
'url': thumbnail_url,
**parse_resolution(thumbnail_url),
}]
api_type = 'concert' if type_ == 'work' else type_
vid_info = self._download_json(
f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
'Accept': 'application/json',
'Accept-Language': language,
})
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name'))
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
if type_ == 'work':
videos = [videos[int(part) - 1]]
thumbnail = traverse_obj(vid_info, (
'image', ..., {self._proto_relative_url}, {url_or_none},
{lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size
return {
'_type': 'playlist',
'id': video_id,
'title': vid_info.get('title'),
'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
'thumbnails': thumbnails,
'album_artist': album_artist,
'entries': self._entries(
videos, language, type_, thumbnail=thumbnail, album_artists=album_artists),
'thumbnail': thumbnail,
'album_artists': album_artists,
}

View File

@@ -24,8 +24,9 @@ from ..utils import (
class DouyuBaseIE(InfoExtractor):
def _download_cryptojs_md5(self, video_id):
for url in [
# XXX: Do NOT use cdn.bootcdn.net; ref: https://sansec.io/research/polyfill-supply-chain-attack
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
'https://unpkg.com/cryptojslib@3.1.2/rollups/md5.js',
]:
js_code = self._download_webpage(
url, video_id, note='Downloading signing dependency', fatal=False)
@@ -35,7 +36,8 @@ class DouyuBaseIE(InfoExtractor):
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
def _get_cryptojs_md5(self, video_id):
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
return self.cache.load(
'douyu', 'crypto-js-md5', min_ver='2024.07.04') or self._download_cryptojs_md5(video_id)
def _calc_sign(self, sign_func, video_id, a):
b = uuid.uuid4().hex

View File

@@ -1,6 +1,11 @@
from .common import InfoExtractor
from ..networking import Request
from ..utils import float_or_none, int_or_none, parse_iso8601
from ..utils import (
float_or_none,
int_or_none,
join_nonempty,
parse_iso8601,
)
class EitbIE(InfoExtractor):
@@ -37,12 +42,9 @@ class EitbIE(InfoExtractor):
if not video_url:
continue
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
format_id = 'http'
if tbr:
format_id += f'-{int(tbr)}'
formats.append({
'url': rendition['PMD_URL'],
'format_id': format_id,
'format_id': join_nonempty('http', int_or_none(tbr)),
'width': int_or_none(rendition.get('FRAME_WIDTH')),
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
'tbr': tbr,

View File

@@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor):
'view_count': int,
'age_limit': 18,
},
'params': {
'proxy': '127.0.0.1:8118',
},
}, {
# New (May 2016) URL layout
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',

View File

@@ -621,6 +621,9 @@ class FacebookIE(InfoExtractor):
'url': playable_url,
})
extract_dash_manifest(video, formats)
if not formats:
# Do not append false positive entry w/o any formats
return
automatic_captions, subtitles = {}, {}
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))

View File

@@ -2167,7 +2167,15 @@ class GenericIE(InfoExtractor):
urllib.parse.urlparse(fragment_query).query or fragment_query
or urllib.parse.urlparse(manifest_url).query or None)
hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
key_query = self._configuration_arg('key_query', [None], casesense=True)[0]
if key_query is not None:
info['extra_param_to_key_url'] = (
urllib.parse.urlparse(key_query).query or key_query
or urllib.parse.urlparse(manifest_url).query or None)
def hex_or_none(value):
return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
}) or None

View File

@@ -5,6 +5,7 @@ from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
join_nonempty,
parse_age_limit,
remove_end,
remove_start,
@@ -287,7 +288,7 @@ class GoIE(AdobePassIE):
if mobj:
height = int(mobj.group(2))
f.update({
'format_id': (f'{format_id}-' if format_id else '') + f'{height}P',
'format_id': join_nonempty(format_id, f'{height}P'),
'width': int(mobj.group(1)),
'height': height,
})

View File

@@ -0,0 +1,32 @@
from .common import InfoExtractor
from ..utils import update_url, url_or_none
from ..utils.traversal import traverse_obj
class GraspopIE(InfoExtractor):
_VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/'
_TESTS = [{
'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/',
'info_dict': {
'id': '101556',
'ext': 'mp4',
'title': 'Thy Art Is Murder',
'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id)
return {
'id': video_id,
'formats': self._extract_m3u8_formats(
# Downgrade manifest request to avoid incomplete certificate chain error
update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'),
**traverse_obj(metadata, {
'title': ('name', {str}),
'thumbnail': ('source', 'poster', {url_or_none}),
}),
}

View File

@@ -3,6 +3,7 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
join_nonempty,
parse_duration,
urljoin,
xpath_element,
@@ -69,7 +70,7 @@ class HBOBaseIE(InfoExtractor):
height = format_info.get('height')
fmt = {
'url': path,
'format_id': 'http{}'.format(f'-{height}p' if height else ''),
'format_id': join_nonempty('http'. height and f'{height}p'),
'width': format_info.get('width'),
'height': height,
}

View File

@@ -44,9 +44,6 @@ class HKETVIE(InfoExtractor):
'duration': 907,
'subtitles': {},
},
'params': {
'geo_verification_proxy': '<HK proxy here>',
},
'skip': 'Geo restricted to HK',
}]

View File

@@ -453,7 +453,7 @@ class InstagramIE(InstagramBaseIE):
else:
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
webpage = self._download_webpage(
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or ''
additional_data = self._search_json(
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
if not additional_data and not media:

View File

@@ -2,7 +2,6 @@ import functools
import hashlib
import json
import time
import urllib.error
import urllib.parse
from .common import InfoExtractor

View File

@@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE):
'title': 'naagin',
},
'playlist_mincount': 120,
}, {
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
'info_dict': {
'id': '3499820',
'title': 'mtv-splitsvilla-x5',
},
'playlist_mincount': 310,
}]
def _entries(self, series_id):
seasons = self._download_json(
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
'Downloading series metadata JSON', query={
'sort': 'season:asc',
'id': series_id,
'responseType': 'common',
})
seasons = traverse_obj(self._download_json(
f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
'Downloading series metadata JSON', query={'responseType': 'common'}), (
'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
'trayTabs', lambda _, v: v['id']))
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
for season_num, season in enumerate(seasons, start=1):
season_id = season['id']
label = season.get('season') or season_num
label = season.get('label') or season_num
for page_num in itertools.count(1):
episodes = traverse_obj(self._download_json(
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',

View File

@@ -158,7 +158,7 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
class JioSaavnPlaylistIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:playlist'
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
'info_dict': {
@@ -173,6 +173,13 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
'title': 'Mood Hindi',
},
'playlist_mincount': 801,
}, {
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
'info_dict': {
'id': 'Me5RridRfDk_',
'title': 'Taaza Tunes',
},
'playlist_mincount': 301,
}]
_PAGE_SIZE = 50

View File

@@ -3,43 +3,52 @@ import json
from .common import InfoExtractor
from ..utils import (
int_or_none,
make_archive_id,
parse_iso8601,
try_get,
str_or_none,
traverse_obj,
url_or_none,
urljoin,
)
class KhanAcademyBaseIE(InfoExtractor):
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
def _parse_video(self, video):
return {
'_type': 'url_transparent',
'url': video['youtubeId'],
'id': video.get('slug'),
'title': video.get('title'),
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
'duration': int_or_none(video.get('duration')),
'description': video.get('description'),
'id': video['youtubeId'],
'ie_key': 'Youtube',
**traverse_obj(video, {
'display_id': ('id', {str_or_none}),
'title': ('translatedTitle', {str}),
'thumbnail': ('thumbnailUrls', ..., 'url', {url_or_none}),
'duration': ('duration', {int_or_none}),
'description': ('description', {str}),
}, get_all=False),
}
def _real_extract(self, url):
display_id = self._match_id(url)
content = self._download_json(
'https://www.khanacademy.org/api/internal/graphql/FetchContentData',
display_id, query={
'https://www.khanacademy.org/api/internal/graphql/ContentForPath', display_id,
query={
'fastly_cacheable': 'persist_until_publish',
'hash': '4134764944',
'lang': 'en',
'pcv': self._PUBLISHED_CONTENT_VERSION,
'hash': '1242644265',
'variables': json.dumps({
'path': display_id,
'queryParams': 'lang=en',
'isModal': False,
'followRedirects': True,
'countryCode': 'US',
'kaLocale': 'en',
'clientPublishedContentVersion': self._PUBLISHED_CONTENT_VERSION,
}),
})['data']['contentJson']
return self._parse_component_props(self._parse_json(content, display_id)['componentProps'])
'lang': 'en',
})['data']['contentRoute']['listedPathData']
return self._parse_component_props(content, display_id)
class KhanAcademyIE(KhanAcademyBaseIE):
@@ -47,64 +56,98 @@ class KhanAcademyIE(KhanAcademyBaseIE):
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
_TEST = {
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
'md5': '1d5c2e70fa6aa29c38eca419f12515ce',
'info_dict': {
'id': 'FlIG3TvQCBQ',
'ext': 'mp4',
'title': 'The one-time pad',
'description': 'The perfect cipher',
'display_id': '716378217',
'duration': 176,
'uploader': 'Brit Cruise',
'uploader_id': 'khanacademy',
'uploader': 'Khan Academy',
'uploader_id': '@khanacademy',
'uploader_url': 'https://www.youtube.com/@khanacademy',
'upload_date': '20120411',
'timestamp': 1334170113,
'license': 'cc-by-nc-sa',
'live_status': 'not_live',
'channel': 'Khan Academy',
'channel_id': 'UC4a-Gbdw7vOaccHmFo40b9g',
'channel_url': 'https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g',
'channel_is_verified': True,
'playable_in_embed': True,
'categories': ['Education'],
'creators': ['Brit Cruise'],
'tags': [],
'age_limit': 0,
'availability': 'public',
'comment_count': int,
'channel_follower_count': int,
'thumbnail': str,
'view_count': int,
'like_count': int,
'heatmap': list,
},
'add_ie': ['Youtube'],
}
def _parse_component_props(self, component_props):
video = component_props['tutorialPageData']['contentModel']
info = self._parse_video(video)
author_names = video.get('authorNames')
info.update({
'uploader': ', '.join(author_names) if author_names else None,
'timestamp': parse_iso8601(video.get('dateAdded')),
'license': video.get('kaUserLicense'),
})
return info
def _parse_component_props(self, component_props, display_id):
video = component_props['content']
return {
**self._parse_video(video),
**traverse_obj(video, {
'creators': ('authorNames', ..., {str}),
'timestamp': ('dateAdded', {parse_iso8601}),
'license': ('kaUserLicense', {str}),
}),
}
class KhanAcademyUnitIE(KhanAcademyBaseIE):
IE_NAME = 'khanacademy:unit'
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
_TEST = {
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('1,2', '')) + '/?(?:[?#&]|$)'
_TESTS = [{
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
'info_dict': {
'id': 'cryptography',
'id': 'x48c910b6',
'title': 'Cryptography',
'description': 'How have humans protected their secret messages through history? What has changed today?',
'display_id': 'computing/computer-science/cryptography',
'_old_archive_ids': ['khanacademyunit cryptography'],
},
'playlist_mincount': 31,
}
}, {
'url': 'https://www.khanacademy.org/computing/computer-science',
'info_dict': {
'id': 'x301707a0',
'title': 'Computer science theory',
'description': 'md5:4b472a4646e6cf6ec4ccb52c4062f8ba',
'display_id': 'computing/computer-science',
'_old_archive_ids': ['khanacademyunit computer-science'],
},
'playlist_mincount': 50,
}]
def _parse_component_props(self, component_props):
curation = component_props['curation']
def _parse_component_props(self, component_props, display_id):
course = component_props['course']
selected_unit = traverse_obj(course, (
'unitChildren', lambda _, v: v['relativeUrl'] == f'/{display_id}', any)) or course
entries = []
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
for tutorial_number, tutorial in enumerate(tutorials, 1):
chapter_info = {
'chapter': tutorial.get('title'),
'chapter_number': tutorial_number,
'chapter_id': tutorial.get('id'),
}
for content_item in (tutorial.get('contentItems') or []):
if content_item.get('kind') == 'Video':
info = self._parse_video(content_item)
info.update(chapter_info)
entries.append(info)
def build_entry(entry):
return self.url_result(urljoin(
'https://www.khanacademy.org', entry['canonicalUrl']),
KhanAcademyIE, title=entry.get('translatedTitle'))
entries = traverse_obj(selected_unit, (
(('unitChildren', ...), None), 'allOrderedChildren', ..., 'curatedChildren',
lambda _, v: v['contentKind'] == 'Video' and v['canonicalUrl'], {build_entry}))
return self.playlist_result(
entries, curation.get('unit'), curation.get('title'),
curation.get('description'))
entries,
display_id=display_id,
**traverse_obj(selected_unit, {
'id': ('id', {str}),
'title': ('translatedTitle', {str}),
'description': ('translatedDescription', {str}),
'_old_archive_ids': ('slug', {str}, {lambda x: [make_archive_id(self, x)] if x else None}),
}))

View File

@@ -0,0 +1,114 @@
import json
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..utils import (
clean_html,
extract_attributes,
get_element_html_by_id,
int_or_none,
parse_duration,
str_or_none,
unified_strdate,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class LaracastsBaseIE(InfoExtractor):
def _get_prop_data(self, url, display_id):
webpage = self._download_webpage(url, display_id)
return traverse_obj(
get_element_html_by_id('app', webpage),
({extract_attributes}, 'data-page', {json.loads}, 'props'))
def _parse_episode(self, episode):
if not traverse_obj(episode, 'vimeoId'):
self.raise_login_required('This video is only available for subscribers.')
return self.url_result(
VimeoIE._smuggle_referrer(
f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
VimeoIE, url_transparent=True,
**traverse_obj(episode, {
'id': ('id', {int}, {str_or_none}),
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
'title': ('title', {clean_html}),
'season_number': ('chapter', {int_or_none}),
'episode_number': ('position', {int_or_none}),
'description': ('body', {clean_html}),
'thumbnail': ('largeThumbnail', {url_or_none}),
'duration': ('length', {int_or_none}),
'date': ('dateSegments', 'published', {unified_strdate}),
}))
class LaracastsIE(LaracastsBaseIE):
IE_NAME = 'laracasts'
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
'info_dict': {
'id': '922040563',
'title': 'Hello, Laravel',
'ext': 'mp4',
'duration': 519,
'date': '20240312',
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
'description': 'md5:ddd658bb241975871d236555657e1dd1',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Episode 1',
'uploader': 'Laracasts',
'uploader_id': 'user20182673',
'uploader_url': 'https://vimeo.com/user20182673',
},
'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed
}]
def _real_extract(self, url):
display_id = self._match_id(url)
return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
class LaracastsPlaylistIE(LaracastsBaseIE):
IE_NAME = 'laracasts:series'
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
'info_dict': {
'title': '30 Days to Learn Laravel',
'id': '210',
'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
'duration': 30600.0,
'modified_date': '20240511',
'description': 'md5:27c260a1668a450984e8f901579912dd',
'categories': ['Frameworks'],
'tags': ['Laravel'],
'display_id': '30-days-to-learn-laravel-11',
},
'playlist_count': 30,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
series = self._get_prop_data(url, display_id)['series']
metadata = {
'display_id': display_id,
**traverse_obj(series, {
'title': ('title', {str}),
'id': ('id', {int}, {str_or_none}),
'description': ('body', {clean_html}),
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
'duration': ('runTime', {parse_duration}),
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
'tags': ('topics', ..., 'name', {str}),
'modified_date': ('lastUpdated', {unified_strdate}),
}),
}
return self.playlist_result(traverse_obj(
series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)

View File

@@ -1,51 +1,35 @@
import random
from .common import InfoExtractor
from ..utils import xpath_text
class MatchTVIE(InfoExtractor):
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
_VALID_URL = [
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
]
_TESTS = [{
'url': 'http://matchtv.ru/#live-player',
'url': 'http://matchtv.ru/on-air/',
'info_dict': {
'id': 'matchtv-live',
'ext': 'flv',
'ext': 'mp4',
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'live_status': 'is_live',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://matchtv.ru/on-air/',
'url': 'https://video.matchtv.ru/iframe/channel/106',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = 'matchtv-live'
video_url = self._download_json(
'http://player.matchtv.ntvplus.tv/player/smil', video_id,
query={
'ts': '',
'quality': 'SD',
'contentId': '561d2c0df7159b37178b4567',
'sign': '',
'includeHighlights': '0',
'userId': '',
'sessionId': random.randint(1, 1000000000),
'contentType': 'channel',
'timeShift': '0',
'platform': 'portal',
},
headers={
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
})['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id)
webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
video_url = self._html_search_regex(
r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
return {
'id': video_id,
'title': 'Матч ТВ - Прямой эфир',
'is_live': True,
'formats': formats,
'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
}

View File

@@ -15,6 +15,7 @@ from ..utils import (
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
@@ -212,13 +213,14 @@ class MediasiteIE(InfoExtractor):
stream_type, 'type%u' % stream_type)
stream_formats = []
for unum, video_url in enumerate(video_urls):
video_url = url_or_none(video_url.get('Location'))
for unum, video in enumerate(video_urls):
video_url = url_or_none(video.get('Location'))
if not video_url:
continue
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
media_type = video_url.get('MediaType')
media_type = video.get('MediaType')
ext = mimetype2ext(video.get('MimeType'))
if media_type == 'SS':
stream_formats.extend(self._extract_ism_formats(
video_url, resource_id,
@@ -229,15 +231,20 @@ class MediasiteIE(InfoExtractor):
video_url, resource_id,
mpd_id=f'{stream_id}-{snum}.{unum}',
fatal=False))
elif ext in ('m3u', 'm3u8'):
stream_formats.extend(self._extract_m3u8_formats(
video_url, resource_id,
m3u8_id=f'{stream_id}-{snum}.{unum}',
fatal=False))
else:
stream_formats.append({
'format_id': f'{stream_id}-{snum}.{unum}',
'url': video_url,
'ext': mimetype2ext(video_url.get('MimeType')),
'ext': ext,
})
if stream.get('HasSlideContent', False):
images = player_options['PlayerLayoutOptions']['Images']
images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}))
if stream.get('HasSlideContent') and images:
stream_formats.append(self.__extract_slides(
stream_id=stream_id,
snum=snum,

View File

@@ -1,5 +1,14 @@
import re
from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj, unified_timestamp
from ..utils import (
int_or_none,
parse_iso8601,
traverse_obj,
unified_timestamp,
url_basename,
url_or_none,
)
class MicrosoftEmbedIE(InfoExtractor):
@@ -63,3 +72,250 @@ class MicrosoftEmbedIE(InfoExtractor):
'subtitles': subtitles,
'thumbnails': thumbnails,
}
class MicrosoftMediusBaseIE(InfoExtractor):
@staticmethod
def _sub_to_dict(subtitle_list):
subtitles = {}
for sub in subtitle_list:
subtitles.setdefault(sub.pop('tag', 'und'), []).append(sub)
return subtitles
def _extract_ism(self, ism_url, video_id):
formats = self._extract_ism_formats(ism_url, video_id)
for fmt in formats:
if fmt['language'] != 'eng' and 'English' not in fmt['format_id']:
fmt['language_preference'] = -10
return formats
class MicrosoftMediusIE(MicrosoftMediusBaseIE):
_VALID_URL = r'https?://medius\.microsoft\.com/Embed/(?:Video\?id=|video-nc/|VideoDetails/)(?P<id>[\da-f-]+)'
_TESTS = [{
'url': 'https://medius.microsoft.com/Embed/video-nc/9640d86c-f513-4889-959e-5dace86e7d2b',
'info_dict': {
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
'ext': 'ismv',
'title': 'Rapidly code, test and ship from secure cloud developer environments',
'description': 'md5:33c8e4facadc438613476eea24165f71',
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
'subtitles': 'count:30',
},
}, {
'url': 'https://medius.microsoft.com/Embed/video-nc/81215af5-c813-4dcd-aede-94f4e1a7daa3',
'info_dict': {
'id': '81215af5-c813-4dcd-aede-94f4e1a7daa3',
'ext': 'ismv',
'title': 'Microsoft Build opening',
'description': 'md5:43455096141077a1f23144cab8cec1cb',
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
'subtitles': 'count:31',
},
}, {
'url': 'https://medius.microsoft.com/Embed/VideoDetails/78493569-9b3b-4a85-a409-ee76e789e25c',
'info_dict': {
'id': '78493569-9b3b-4a85-a409-ee76e789e25c',
'ext': 'ismv',
'title': ' Anomaly Detection & Root cause at Edge',
'description': 'md5:f8f1ad93d7918649bfb97fa081b03b83',
'thumbnail': r're:https://mediusdownload.event.microsoft.com/asset.*\.jpg.*',
'subtitles': 'count:17',
},
}, {
'url': 'https://medius.microsoft.com/Embed/Video?id=0dc69bda-079b-4070-a7db-a8da1a06a9c7',
'only_matching': True,
}, {
'url': 'https://medius.microsoft.com/Embed/video-nc/fe823a91-959c-465b-96d4-8f4db624f72c',
'only_matching': True,
}]
def _extract_subtitle(self, webpage, video_id):
captions = traverse_obj(
self._search_json(r'const\s+captionsConfiguration\s*=', webpage, 'captions', video_id, default=None),
('languageList', lambda _, v: url_or_none(v['src']), {
'url': 'src',
'tag': ('srclang', {str}),
'name': ('kind', {str}),
})) or [{'url': url, 'tag': url_basename(url).split('.vtt')[0].split('_')[-1]}
for url in re.findall(r'var\s+file\s+=\s+\{[^}]+\'(https://[^\']+\.vtt\?[^\']+)', webpage)]
return self._sub_to_dict(captions)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'https://medius.microsoft.com/Embed/video-nc/{video_id}', video_id)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'formats': self._extract_ism(
self._search_regex(r'StreamUrl\s*=\s*"([^"]+manifest)"', webpage, 'ism url'), video_id),
'thumbnail': self._og_search_thumbnail(webpage),
'subtitles': self._extract_subtitle(webpage, video_id),
}
class MicrosoftLearnPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?(?P<type>shows|events)/(?P<id>[\w-]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners',
'info_dict': {
'id': 'bash-for-beginners',
'title': 'Bash for Beginners',
'description': 'md5:16a91c07222117d1e00912f0dbc02c2c',
},
'playlist_count': 20,
}, {
'url': 'https://learn.microsoft.com/en-us/events/build-2022',
'info_dict': {
'id': 'build-2022',
'title': 'Microsoft Build 2022 - Events',
'description': 'md5:c16b43848027df837b22c6fbac7648d3',
},
'playlist_count': 201,
}]
def _entries(self, url_base, video_id):
skip = 0
while True:
playlist_info = self._download_json(url_base, video_id, f'Downloading entries {skip}', query={
'locale': 'en-us',
'$skip': skip,
})
url_paths = traverse_obj(playlist_info, ('results', ..., 'url', {str}))
for url_path in url_paths:
yield self.url_result(f'https://learn.microsoft.com/en-us{url_path}')
skip += len(url_paths)
if skip >= playlist_info.get('count', 0) or not url_paths:
break
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
webpage = self._download_webpage(url, playlist_id)
metainfo = {
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
}
sub_type = 'episodes' if playlist_type == 'shows' else 'sessions'
url_base = f'https://learn.microsoft.com/api/contentbrowser/search/{playlist_type}/{playlist_id}/{sub_type}'
return self.playlist_result(self._entries(url_base, playlist_id), playlist_id, **metainfo)
class MicrosoftLearnEpisodeIE(MicrosoftMediusBaseIE):
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?shows/[\w-]+/(?P<id>[^?#/]+)'
_TESTS = [{
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners/what-is-the-difference-between-a-terminal-and-a-shell-2-of-20-bash-for-beginners/',
'info_dict': {
'id': 'd44e1a03-a0e5-45c2-9496-5c9fa08dc94c',
'ext': 'ismv',
'title': 'What is the Difference Between a Terminal and a Shell? (Part 2 of 20)',
'description': 'md5:7bbbfb593d21c2cf2babc3715ade6b88',
'timestamp': 1676339547,
'upload_date': '20230214',
'thumbnail': r're:https://learn\.microsoft\.com/video/media/.*\.png',
'subtitles': 'count:14',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
entry_id = self._html_search_meta('entryId', webpage, 'entryId', fatal=True)
video_info = self._download_json(
f'https://learn.microsoft.com/api/video/public/v1/entries/{entry_id}', video_id)
return {
'id': entry_id,
'formats': self._extract_ism(video_info['publicVideo']['adaptiveVideoUrl'], video_id),
'subtitles': self._sub_to_dict(traverse_obj(video_info, (
'publicVideo', 'captions', lambda _, v: url_or_none(v['url']), {
'tag': ('language', {str}),
'url': 'url',
}))),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
**traverse_obj(video_info, {
'timestamp': ('createTime', {parse_iso8601}),
'thumbnails': ('publicVideo', 'thumbnailOtherSizes', ..., {'url': {url_or_none}}),
}),
}
class MicrosoftLearnSessionIE(InfoExtractor):
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?events/[\w-]+/(?P<id>[^?#/]+)'
_TESTS = [{
'url': 'https://learn.microsoft.com/en-us/events/build-2022/ts01-rapidly-code-test-ship-from-secure-cloud-developer-environments',
'info_dict': {
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
'ext': 'ismv',
'title': 'Rapidly code, test and ship from secure cloud developer environments - Events',
'description': 'md5:f26c1a85d41c1cffd27a0279254a25c3',
'timestamp': 1653408600,
'upload_date': '20220524',
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
metainfo = {
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'timestamp': parse_iso8601(self._html_search_meta('startDate', webpage, 'startDate')),
}
return self.url_result(
self._html_search_meta('externalVideoUrl', webpage, 'videoUrl', fatal=True),
url_transparent=True, ie=MicrosoftMediusIE, **metainfo)
class MicrosoftBuildIE(InfoExtractor):
_VALID_URL = [
r'https?://build\.microsoft\.com/[\w-]+/sessions/(?P<id>[\da-f-]+)',
r'https?://build\.microsoft\.com/[\w-]+/(?P<id>sessions)/?(?:[?#]|$)',
]
_TESTS = [{
'url': 'https://build.microsoft.com/en-US/sessions/b49feb31-afcd-4217-a538-d3ca1d171198?source=sessions',
'info_dict': {
'id': 'aee55fb5-fcf9-4b38-b764-a3527cb57554',
'ext': 'ismv',
'title': 'Microsoft Build opening keynote',
'description': 'md5:d38338f336ef4b6ef9ad2a7466a76655',
'timestamp': 1716307200,
'upload_date': '20240521',
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
},
}, {
'url': 'https://build.microsoft.com/en-US/sessions',
'info_dict': {
'id': 'sessions',
},
'playlist_mincount': 418,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
entries = [
self.url_result(
video_info['onDemand'], ie=MicrosoftMediusIE, url_transparent=True, **traverse_obj(video_info, {
'id': ('sessionId', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('startDateTime', {parse_iso8601}),
}))
for video_info in self._download_json(
'https://api-v2.build.microsoft.com/api/session/all/en-US', video_id, 'Downloading video info')
]
if video_id == 'sessions':
return self.playlist_result(entries, video_id)
else:
return traverse_obj(entries, (lambda _, v: v['id'] == video_id), get_all=False)

View File

@@ -1,188 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
smuggle_url,
unsmuggle_url,
xpath_text,
)
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
def _extract_base_url(self, course_id, display_id):
return self._download_json(
f'https://api-mlxprod.microsoft.com/services/products/anonymous/{course_id}',
display_id, 'Downloading course base URL')
def _extract_chapter_and_title(self, title):
if not title:
return None, None
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
IE_NAME = 'mva'
IE_DESC = 'Microsoft Virtual Academy videos'
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)'
_TESTS = [{
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
'info_dict': {
'id': 'gfVXISmEB_6804984382',
'ext': 'mp4',
'title': 'Course Introduction',
'formats': 'mincount:3',
'subtitles': {
'en': [{
'ext': 'ttml',
}],
},
},
}, {
'url': 'mva:11788:gfVXISmEB_6804984382',
'only_matching': True,
}]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = self._match_valid_url(url)
course_id = mobj.group('course_id')
video_id = mobj.group('id')
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
settings = self._download_xml(
f'{base_url}/content/content_{video_id}/videosettings.xml?v=1',
video_id, 'Downloading video settings XML')
_, title = self._extract_chapter_and_title(xpath_text(
settings, './/Title', 'title', fatal=True))
formats = []
for sources in settings.findall('.//MediaSources'):
sources_type = sources.get('videoType')
for source in sources.findall('./MediaSource'):
video_url = source.text
if not video_url or not video_url.startswith('http'):
continue
if sources_type == 'smoothstreaming':
formats.extend(self._extract_ism_formats(
video_url, video_id, 'mss', fatal=False))
continue
video_mode = source.get('videoMode')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
codec = source.get('codec')
acodec, vcodec = [None] * 2
if codec:
codecs = codec.split(',')
if len(codecs) == 2:
acodec, vcodec = codecs
elif len(codecs) == 1:
vcodec = codecs[0]
formats.append({
'url': video_url,
'format_id': video_mode,
'height': height,
'acodec': acodec,
'vcodec': vcodec,
})
subtitles = {}
for source in settings.findall('.//MarkerResourceSource'):
subtitle_url = source.text
if not subtitle_url:
continue
subtitles.setdefault('en', []).append({
'url': f'{base_url}/{subtitle_url}',
'ext': source.get('type'),
})
return {
'id': video_id,
'title': title,
'subtitles': subtitles,
'formats': formats,
}
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
IE_NAME = 'mva:course'
IE_DESC = 'Microsoft Virtual Academy courses'
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)'
_TESTS = [{
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
'info_dict': {
'id': '11788',
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
},
'playlist_count': 36,
}, {
# with emphasized chapters
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
'info_dict': {
'id': '16335',
'title': 'Developing Windows 10 Games with Construct 2',
},
'playlist_count': 10,
}, {
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
'only_matching': True,
}, {
'url': 'mva:course:11788',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if MicrosoftVirtualAcademyIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
mobj = self._match_valid_url(url)
course_id = mobj.group('id')
display_id = mobj.group('display_id')
base_url = self._extract_base_url(course_id, display_id)
manifest = self._download_json(
f'{base_url}/imsmanifestlite.json',
display_id, 'Downloading course manifest JSON')['manifest']
organization = manifest['organizations']['organization'][0]
entries = []
for chapter in organization['item']:
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
chapter_id = chapter.get('@identifier')
for item in chapter.get('item', []):
item_id = item.get('@identifier')
if not item_id:
continue
metadata = item.get('resource', {}).get('metadata') or {}
if metadata.get('learningresourcetype') != 'Video':
continue
_, title = self._extract_chapter_and_title(item.get('title'))
duration = parse_duration(metadata.get('duration'))
description = metadata.get('description')
entries.append({
'_type': 'url_transparent',
'url': smuggle_url(
f'mva:{course_id}:{item_id}', {'base_url': base_url}),
'title': title,
'description': description,
'duration': duration,
'chapter': chapter_title,
'chapter_number': chapter_number,
'chapter_id': chapter_id,
})
title = organization.get('title') or manifest.get('metadata', {}).get('title')
return self.playlist_result(entries, course_id, title)

View File

@@ -9,9 +9,10 @@ from ..utils import (
join_nonempty,
parse_duration,
parse_iso8601,
traverse_obj,
try_get,
url_or_none,
)
from ..utils.traversal import traverse_obj
class MLBBaseIE(InfoExtractor):
@@ -326,15 +327,20 @@ class MLBTVIE(InfoExtractor):
video_id)['data']['Airings']
formats, subtitles = [], {}
for airing in airings:
m3u8_url = self._download_json(
for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
m3u8_url = traverse_obj(self._download_json(
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
headers={
note=f'Downloading {format_id} stream info JSON',
errnote=f'Failed to download {format_id} stream info, skipping',
fatal=False, headers={
'Authorization': self._access_token,
'Accept': 'application/vnd.media-service+json; version=2',
})['stream']['complete']
}), ('stream', 'complete', {url_or_none}))
if not m3u8_url:
continue
f, s = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage')))
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
formats.extend(f)
self._merge_subtitles(s, target=subtitles)

View File

@@ -5,39 +5,103 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
int_or_none,
try_get,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_id,
parse_count,
remove_end,
update_url,
urlencode_postdata,
)
class MurrtubeIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'''(?x)
(?:
murrtube:|
https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+)\-
https?://murrtube\.net/(?:v/|videos/(?P<slug>[a-z0-9-]+?)-)
)
(?P<id>[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
(?P<id>[A-Z0-9]{4}|[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})
'''
_TEST = {
_TESTS = [{
'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
'md5': '169f494812d9a90914b42978e73aa690',
'md5': '70380878a77e8565d4aea7f68b8bbb35',
'info_dict': {
'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
'id': 'ca885d8456b95de529b6723b158032e11115d',
'ext': 'mp4',
'title': 'Inferno X Skyler',
'description': 'Humping a very good slutty sheppy (roomate)',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 284,
'uploader': 'Inferno Wolf',
'age_limit': 18,
'thumbnail': 'https://storage.murrtube.net/murrtube-production/ekbs3zcfvuynnqfx72nn2tkokvsd',
'comment_count': int,
'view_count': int,
'like_count': int,
'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'],
},
}
}, {
'url': 'https://murrtube.net/v/0J2Q',
'md5': '31262f6ac56f0ca75e5a54a0f3fefcb6',
'info_dict': {
'id': '8442998c52134968d9caa36e473e1a6bac6ca',
'ext': 'mp4',
'uploader': 'Hayel',
'title': 'Who\'s in charge now?',
'description': 'md5:795791e97e5b0f1805ea84573f02a997',
'age_limit': 18,
'thumbnail': 'https://storage.murrtube.net/murrtube-production/fb1ojjwiucufp34ya6hxu5vfqi5s',
'comment_count': int,
'view_count': int,
'like_count': int,
},
}]
def _extract_count(self, name, html):
return parse_count(self._search_regex(
rf'([\d,]+)\s+<span[^>]*>{name}</span>', html, name, default=None))
def _real_initialize(self):
homepage = self._download_webpage(
'https://murrtube.net', None, note='Getting session token')
self._request_webpage(
'https://murrtube.net/accept_age_check', None, 'Setting age cookie',
data=urlencode_postdata(self._hidden_inputs(homepage)))
def _real_extract(self, url):
video_id = self._match_id(url)
if video_id.startswith('murrtube:'):
raise ExtractorError('Support for murrtube: prefix URLs is broken')
video_page = self._download_webpage(url, video_id)
video_attrs = extract_attributes(get_element_html_by_id('video', video_page))
playlist = update_url(video_attrs['data-url'], query=None)
video_id = self._search_regex(r'/([\da-f]+)/index.m3u8', playlist, 'video id')
return {
'id': video_id,
'title': remove_end(self._og_search_title(video_page), ' - Murrtube'),
'age_limit': 18,
'formats': self._extract_m3u8_formats(playlist, video_id, 'mp4'),
'description': self._og_search_description(video_page),
'thumbnail': update_url(self._og_search_thumbnail(video_page, default=''), query=None) or None,
'uploader': clean_html(get_element_by_class('pl-1 is-size-6 has-text-lighter', video_page)),
'view_count': self._extract_count('Views', video_page),
'like_count': self._extract_count('Likes', video_page),
'comment_count': self._extract_count('Comments', video_page),
}
class MurrtubeUserIE(InfoExtractor):
_WORKING = False
IE_DESC = 'Murrtube user profile'
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
_TESTS = [{
'url': 'https://murrtube.net/stormy',
'info_dict': {
'id': 'stormy',
},
'playlist_mincount': 27,
}]
_PAGE_SIZE = 10
def _download_gql(self, video_id, op, note=None, fatal=True):
result = self._download_json(
@@ -46,73 +110,6 @@ class MurrtubeIE(InfoExtractor):
headers={'Content-Type': 'application/json'})
return result['data']
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_gql(video_id, {
'operationName': 'Medium',
'variables': {
'id': video_id,
},
'query': '''\
query Medium($id: ID!) {
medium(id: $id) {
title
description
key
duration
commentsCount
likesCount
viewsCount
thumbnailKey
tagList
user {
name
__typename
}
__typename
}
}'''})
meta = data['medium']
storage_url = 'https://storage.murrtube.net/murrtube/'
format_url = storage_url + meta.get('key', '')
thumbnail = storage_url + meta.get('thumbnailKey', '')
if determine_ext(format_url) == 'm3u8':
formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
else:
formats = [{'url': format_url}]
return {
'id': video_id,
'title': meta.get('title'),
'description': meta.get('description'),
'formats': formats,
'thumbnail': thumbnail,
'duration': int_or_none(meta.get('duration')),
'uploader': try_get(meta, lambda x: x['user']['name']),
'view_count': meta.get('viewsCount'),
'like_count': meta.get('likesCount'),
'comment_count': meta.get('commentsCount'),
'tags': meta.get('tagList'),
'age_limit': 18,
}
class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
_WORKING = False
IE_DESC = 'Murrtube user profile'
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
_TEST = {
'url': 'https://murrtube.net/stormy',
'info_dict': {
'id': 'stormy',
},
'playlist_mincount': 27,
}
_PAGE_SIZE = 10
def _fetch_page(self, username, user_id, page):
data = self._download_gql(username, {
'operationName': 'Media',

View File

@@ -16,6 +16,7 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
join_nonempty,
mimetype2ext,
parse_age_limit,
parse_duration,
@@ -498,10 +499,8 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
m3u8_id=format_id, fatal=False))
continue
tbr = int_or_none(va.get('bitrate'), 1000)
if tbr:
format_id += f'-{tbr}'
formats.append({
'format_id': format_id,
'format_id': join_nonempty(format_id, tbr),
'url': public_url,
'width': int_or_none(va.get('width')),
'height': int_or_none(va.get('height')),

View File

@@ -22,12 +22,22 @@ from ..utils import (
class NetEaseMusicBaseIE(InfoExtractor):
_FORMATS = ['bMusic', 'mMusic', 'hMusic']
# XXX: _extract_formats logic depends on the order of the levels in each tier
_LEVELS = (
'standard', # free tier; 标准; 128kbps mp3 or aac
'higher', # free tier; 192kbps mp3 or aac
'exhigh', # free tier; 极高 (HQ); 320kbps mp3 or aac
'lossless', # VIP tier; 无损 (SQ); 48kHz/16bit flac
'hires', # VIP tier; 高解析度无损 (Hi-Res); 192kHz/24bit flac
'jyeffect', # VIP tier; 高清臻音 (Spatial Audio); 96kHz/24bit flac
'jymaster', # SVIP tier; 超清母带 (Master); 192kHz/24bit flac
'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac
)
_API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False
@staticmethod
def kilo_or_none(value):
def _kilo_or_none(value):
return int_or_none(value, scale=1000)
def _create_eapi_cipher(self, api_path, query_body, cookies):
@@ -66,45 +76,43 @@ class NetEaseMusicBaseIE(InfoExtractor):
**headers,
}, **kwargs)
def _call_player_api(self, song_id, bitrate):
def _call_player_api(self, song_id, level):
return self._download_eapi_json(
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate},
note=f'Downloading song URL info: bitrate {bitrate}')
'/song/enhance/player/url/v1', song_id,
{'ids': f'[{song_id}]', 'level': level, 'encodeType': 'flac'},
note=f'Downloading song URL info: level {level}')
def extract_formats(self, info):
err = 0
def _extract_formats(self, info):
formats = []
song_id = info['id']
for song_format in self._FORMATS:
details = info.get(song_format)
if not details:
for level in self._LEVELS:
song = traverse_obj(
self._call_player_api(song_id, level), ('data', lambda _, v: url_or_none(v['url']), any))
if not song:
break # Media is not available due to removal or geo-restriction
actual_level = song.get('level')
if actual_level and actual_level != level:
if level in ('lossless', 'jymaster'):
break # We've already extracted the highest level of the user's account tier
continue
bitrate = int_or_none(details.get('bitrate')) or 999000
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
song_url = song['url']
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({
'url': song_url,
'format_id': song_format,
'asr': traverse_obj(details, ('sr', {int_or_none})),
**traverse_obj(song, {
'ext': ('type', {str}),
'abr': ('br', {self.kilo_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
elif err == 0:
err = traverse_obj(song, ('code', {int})) or 0
formats.append({
'url': song['url'],
'format_id': level,
'vcodec': 'none',
**traverse_obj(song, {
'ext': ('type', {str}),
'abr': ('br', {self._kilo_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
if not actual_level:
break # Only 1 level is available if API does not return a value (netease:program)
if not formats:
if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(f'No media links found (site code {err})', expected=True)
else:
self.raise_geo_restricted(
'No media links found: probably due to geo restriction.', countries=['CN'])
self.raise_geo_restricted(
'No media links found; possibly due to geo restriction', countries=['CN'])
return formats
def query_api(self, endpoint, video_id, note):
def _query_api(self, endpoint, video_id, note):
result = self._download_json(
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
code = traverse_obj(result, ('code', {int}))
@@ -128,32 +136,29 @@ class NetEaseMusicBaseIE(InfoExtractor):
class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:song'
IE_DESC = '网易云音乐'
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://music.163.com/#/song?id=548648087',
'url': 'https://music.163.com/#/song?id=550136151',
'info_dict': {
'id': '548648087',
'id': '550136151',
'ext': 'mp3',
'title': '戒烟 (Live)',
'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊',
'title': 'It\'s Ok (Live)',
'creators': 'count:10',
'timestamp': 1522944000,
'upload_date': '20180405',
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
'duration': 256,
'description': 'md5:9fd07059c2ccee3950dc8363429a3135',
'duration': 197,
'thumbnail': r're:^http.*\.jpg',
'album': '偶像练习生 表演曲目合集',
'average_rating': int,
'album_artist': '偶像练习生',
'album_artists': ['偶像练习生'],
},
}, {
'note': 'No lyrics.',
'url': 'http://music.163.com/song?id=17241424',
'info_dict': {
'id': '17241424',
'ext': 'mp3',
'title': 'Opus 28',
'creator': 'Dustin O\'Halloran',
'upload_date': '20080211',
'timestamp': 1202745600,
'duration': 263,
@@ -161,15 +166,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'album': 'Piano Solos Vol. 2',
'album_artist': 'Dustin O\'Halloran',
'average_rating': int,
'description': '[00:05.00]纯音乐,请欣赏\n',
'album_artists': ['Dustin O\'Halloran'],
'creators': ['Dustin O\'Halloran'],
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
},
}, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
'md5': '95826c73ea50b1c288b22180ec9e754d',
'md5': 'b896be78d8d34bd7bb665b26710913ff',
'info_dict': {
'id': '95670',
'ext': 'mp3',
'title': '国际歌',
'creator': '马备',
'upload_date': '19911130',
'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
@@ -180,6 +188,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'average_rating': int,
'album': '红色摇滚',
'album_artist': '侯牧人',
'creators': ['马备'],
'album_artists': ['侯牧人'],
},
}, {
'url': 'http://music.163.com/#/song?id=32102397',
@@ -188,7 +198,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'id': '32102397',
'ext': 'mp3',
'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
'creators': ['Taylor Swift', 'Kendrick Lamar'],
'upload_date': '20150516',
'timestamp': 1431792000,
'description': 'md5:21535156efb73d6d1c355f95616e285a',
@@ -207,7 +217,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'id': '22735043',
'ext': 'mp3',
'title': '소원을 말해봐 (Genie)',
'creator': '少女时代',
'creators': ['少女时代'],
'upload_date': '20100127',
'timestamp': 1264608000,
'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
@@ -251,12 +261,12 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
def _real_extract(self, url):
song_id = self._match_id(url)
info = self.query_api(
info = self._query_api(
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info)
formats = self._extract_formats(info)
lyrics = self._process_lyrics(self.query_api(
lyrics = self._process_lyrics(self._query_api(
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
lyric_data = {
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
@@ -267,14 +277,14 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'id': song_id,
'formats': formats,
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None,
'creators': traverse_obj(info, ('artists', ..., 'name')) or None,
'album_artists': traverse_obj(info, ('album', 'artists', ..., 'name')) or None,
**lyric_data,
**traverse_obj(info, {
'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
'duration': ('duration', {self._kilo_or_none}),
'album': ('album', 'name', {str}),
'average_rating': ('score', {int_or_none}),
}),
@@ -284,7 +294,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:album'
IE_DESC = '网易云音乐 - 专辑'
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://music\.163\.com/(?:#/)?album\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://music.163.com/#/album?id=133153666',
'info_dict': {
@@ -294,7 +304,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
'description': '桃几2021年翻唱合集',
'thumbnail': r're:^http.*\.jpg',
},
'playlist_mincount': 13,
'playlist_mincount': 12,
}, {
'url': 'http://music.163.com/#/album?id=220780',
'info_dict': {
@@ -328,7 +338,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:singer'
IE_DESC = '网易云音乐 - 歌手'
_VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://music\.163\.com/(?:#/)?artist\?id=(?P<id>[0-9]+)'
_TESTS = [{
'note': 'Singer has aliases.',
'url': 'http://music.163.com/#/artist?id=10559',
@@ -358,7 +368,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
def _real_extract(self, url):
singer_id = self._match_id(url)
info = self.query_api(
info = self._query_api(
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
name = join_nonempty(
@@ -372,7 +382,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
class NetEaseMusicListIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:playlist'
IE_DESC = '网易云音乐 - 歌单'
_VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/playlist?id=79177352',
'info_dict': {
@@ -405,11 +415,15 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
'info_dict': {
'id': '3733003',
'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
'title': 're:韩国Melon排行榜周榜(?: [0-9]{4}-[0-9]{2}-[0-9]{2})?',
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
'upload_date': '20200109',
'uploader_id': '2937386',
'tags': ['韩语', '榜单'],
'uploader': 'Melon榜单',
'timestamp': 1578569373,
},
'playlist_count': 50,
'skip': 'Blocked outside Mainland China',
}]
def _real_extract(self, url):
@@ -426,7 +440,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'tags': ('tags', ..., {str}),
'uploader': ('creator', 'nickname', {str}),
'uploader_id': ('creator', 'userId', {str_or_none}),
'timestamp': ('updateTime', {self.kilo_or_none}),
'timestamp': ('updateTime', {self._kilo_or_none}),
}))
if traverse_obj(info, ('playlist', 'specialType')) == 10:
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
@@ -437,7 +451,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:mv'
IE_DESC = '网易云音乐 - MV'
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://music\.163\.com/(?:#/)?mv\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://music.163.com/#/mv?id=10958064',
'info_dict': {
@@ -445,7 +459,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
'ext': 'mp4',
'title': '交换余生',
'description': 'md5:e845872cff28820642a2b02eda428fea',
'creator': '林俊杰',
'creators': ['林俊杰'],
'upload_date': '20200916',
'thumbnail': r're:http.*\.jpg',
'duration': 364,
@@ -460,7 +474,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
'ext': 'mp4',
'title': '이럴거면 그러지말지',
'description': '白雅言自作曲唱甜蜜爱情',
'creator': '白娥娟',
'creators': ['白娥娟'],
'upload_date': '20150520',
'thumbnail': r're:http.*\.jpg',
'duration': 216,
@@ -468,12 +482,28 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
'like_count': int,
'comment_count': int,
},
'skip': 'Blocked outside Mainland China',
}, {
'note': 'This MV has multiple creators.',
'url': 'https://music.163.com/#/mv?id=22593543',
'info_dict': {
'id': '22593543',
'ext': 'mp4',
'title': '老北京杀器',
'creators': ['秃子2z', '辉子', 'Saber梁维嘉'],
'duration': 206,
'upload_date': '20240618',
'like_count': int,
'comment_count': int,
'thumbnail': r're:http.*\.jpg',
'view_count': int,
},
}]
def _real_extract(self, url):
mv_id = self._match_id(url)
info = self.query_api(
info = self._query_api(
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
formats = [
@@ -484,13 +514,13 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
return {
'id': mv_id,
'formats': formats,
'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
**traverse_obj(info, {
'title': ('name', {str}),
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
'creator': ('artistName', {str}),
'upload_date': ('publishTime', {unified_strdate}),
'thumbnail': ('cover', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
'duration': ('duration', {self._kilo_or_none}),
'view_count': ('playCount', {int_or_none}),
'like_count': ('likeCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}),
@@ -501,7 +531,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:program'
IE_DESC = '网易云音乐 - 电台节目'
_VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/program?id=10109055',
'info_dict': {
@@ -509,7 +539,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'ext': 'mp3',
'title': '不丹足球背后的故事',
'description': '喜马拉雅人的足球梦 ...',
'creator': '大话西藏',
'creators': ['大话西藏'],
'timestamp': 1434179287,
'upload_date': '20150613',
'thumbnail': r're:http.*\.jpg',
@@ -522,7 +552,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'id': '10141022',
'title': '滚滚电台的有声节目',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ',
'creators': ['滚滚电台ORZ'],
'timestamp': 1434450733,
'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg',
@@ -536,7 +566,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'ext': 'mp3',
'title': '滚滚电台的有声节目',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ',
'creators': ['滚滚电台ORZ'],
'timestamp': 1434450733,
'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg',
@@ -550,7 +580,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
def _real_extract(self, url):
program_id = self._match_id(url)
info = self.query_api(
info = self._query_api(
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
metainfo = traverse_obj(info, {
@@ -558,17 +588,17 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'description': ('description', {str}),
'creator': ('dj', 'brand', {str}),
'thumbnail': ('coverUrl', {url_or_none}),
'timestamp': ('createTime', {self.kilo_or_none}),
'timestamp': ('createTime', {self._kilo_or_none}),
})
if not self._yes_playlist(
info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
formats = self.extract_formats(info['mainSong'])
formats = self._extract_formats(info['mainSong'])
return {
'id': str(info['mainSong']['id']),
'formats': formats,
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
**metainfo,
}
@@ -579,7 +609,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:djradio'
IE_DESC = '网易云音乐 - 电台'
_VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://music\.163\.com/(?:#/)?djradio\?id=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://music.163.com/#/djradio?id=42',
'info_dict': {
@@ -597,7 +627,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
metainfo = {}
entries = []
for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api(
info = self._query_api(
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
dj_id, note=f'Downloading dj programs - {offset}')

View File

@@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
join_nonempty,
parse_duration,
parse_iso8601,
)
@@ -41,7 +42,7 @@ class NHLBaseIE(InfoExtractor):
else:
height = int_or_none(playback.get('height'))
formats.append({
'format_id': playback.get('name', 'http' + (f'-{height}p' if height else '')),
'format_id': playback.get('name') or join_nonempty('http', height and f'{height}p'),
'url': playback_url,
'width': int_or_none(playback.get('width')),
'height': height,

View File

@@ -43,15 +43,17 @@ class NuumBaseIE(InfoExtractor):
is_live = media.get('media_status') == 'RUNNING'
formats, subtitles = None, None
headers = {'Referer': 'https://nuum.ru/'}
if extract_formats:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', live=is_live)
media_url, video_id, 'mp4', live=is_live, headers=headers)
return filter_dict({
'id': video_id,
'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
'http_headers': headers,
**traverse_obj(container, {
'title': ('media_container_name', {str}),
'description': ('media_container_description', {str}),
@@ -78,7 +80,7 @@ class NuumMediaIE(NuumBaseIE):
'only_matching': True,
}, {
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
'md5': 'f1d9118a30403e32b702a204eb03aca3',
'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
'info_dict': {
'id': '1567547',
'ext': 'mp4',

View File

@@ -550,7 +550,8 @@ class ORFONIE(InfoExtractor):
return self._extract_video_info(segment_id, selected_segment)
# Even some segmented videos have an unsegmented version available in API response root
if not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none})):
if (self._configuration_arg('prefer_segments_playlist')
or not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none}))):
return self.playlist_result(
(self._extract_video_info(str(segment['id']), segment) for segment in segments),
video_id, **self._parse_metadata(api_json), multi_video=True)

View File

@@ -2,6 +2,7 @@ import itertools
import urllib.parse
from .common import InfoExtractor
from .sproutvideo import VidsIoIE
from .vimeo import VimeoIE
from ..networking.exceptions import HTTPError
from ..utils import (
@@ -12,6 +13,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
smuggle_url,
str_or_none,
traverse_obj,
url_or_none,
@@ -305,22 +307,28 @@ class PatreonIE(PatreonBaseIE):
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
}))
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
headers = {'referer': 'https://patreon.com/'}
# handle Vimeo embeds
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
v_url = urllib.parse.unquote(self._html_search_regex(
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
if url_or_none(v_url) and self._request_webpage(
v_url, video_id, 'Checking Vimeo embed URL',
headers={'Referer': 'https://patreon.com/'},
fatal=False, errnote=False):
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
entries.append(self.url_result(
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
VimeoIE, url_transparent=True))
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
entries.append(self.url_result(embed_url))
if embed_url and (urlh := self._request_webpage(
embed_url, video_id, 'Checking embed URL', headers=headers,
fatal=False, errnote=False, expected_status=403)):
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
entries.append(self.url_result(smuggle_url(embed_url, headers)))
post_file = traverse_obj(attributes, ('post_file', {dict}))
if post_file:

View File

@@ -41,7 +41,7 @@ class PelotonIE(InfoExtractor):
}, 'params': {
'skip_download': 'm3u8',
},
'_skip': 'Account needed',
'skip': 'Account needed',
}, {
'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
'info_dict': {
@@ -61,7 +61,7 @@ class PelotonIE(InfoExtractor):
}, 'params': {
'skip_download': 'm3u8',
},
'_skip': 'Account needed',
'skip': 'Account needed',
}]
_MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
@@ -199,7 +199,7 @@ class PelotonLiveIE(InfoExtractor):
'params': {
'skip_download': 'm3u8',
},
'_skip': 'Account needed',
'skip': 'Account needed',
}
def _real_extract(self, url):

View File

@@ -1,5 +1,5 @@
from .common import InfoExtractor
from ..utils import int_or_none
from ..utils import int_or_none, join_nonempty
class PerformGroupIE(InfoExtractor):
@@ -50,11 +50,8 @@ class PerformGroupIE(InfoExtractor):
if not c_url:
continue
tbr = int_or_none(c.get('bitrate'), 1000)
format_id = 'http'
if tbr:
format_id += f'-{tbr}'
formats.append({
'format_id': format_id,
'format_id': join_nonempty('http', tbr),
'url': c_url,
'tbr': tbr,
'width': int_or_none(c.get('width')),

View File

@@ -1,28 +1,40 @@
from .common import InfoExtractor
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
from ..utils import (
OnDemandPagedList,
clean_html,
int_or_none,
jwt_decode_hs256,
url_or_none,
)
from ..utils.traversal import traverse_obj
def result_from_props(props, episode_id=None):
def result_from_props(props):
return {
'id': props.get('podcast_id') or episode_id,
'title': props.get('title'),
'url': props['mediaURL'],
**traverse_obj(props, {
'id': ('_id', {str}),
'title': ('title', {str}),
'url': ('mediaURL', {url_or_none}),
'description': ('description', {clean_html}),
'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}),
'timestamp': ('timestamp', {int_or_none}),
'duration': ('duration', {int_or_none}),
}),
'ext': 'mp3',
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
'timestamp': props.get('timestamp'),
'duration': int_or_none(props.get('duration')),
'vcodec': 'none',
}
class PodbayFMIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
_VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)'
_TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
'md5': '98b41285dcf7989d105a4ed0404054cf',
'md5': '895ac8505de349515f5ee8a4a3195c93',
'info_dict': {
'id': '1647338400',
'id': '62306451f4a48e58d0c4d6a8',
'title': 'Part One: Kissinger',
'ext': 'mp3',
'description': r're:^We begin our epic six part series on Henry Kissinger.+',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1647338400,
'duration': 5001,
@@ -34,24 +46,25 @@ class PodbayFMIE(InfoExtractor):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
data = self._search_nextjs_data(webpage, episode_id)
return result_from_props(data['props']['pageProps']['episode'], episode_id)
return result_from_props(data['props']['pageProps']['episode'])
class PodbayFMChannelIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards',
'info_dict': {
'id': 'behind-the-bastards',
'title': 'Behind the Bastards',
},
'playlist_mincount': 21,
}]
_PAGE_SIZE = 10
def _fetch_page(self, channel_id, pagenum):
return self._download_json(
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
channel_id)['podcast']
f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast']
@staticmethod
def _results_from_page(channel_id, page):

View File

@@ -5,6 +5,7 @@ from ..utils import (
ExtractorError,
try_get,
)
from ..utils.traversal import traverse_obj
class PokerGoBaseIE(InfoExtractor):
@@ -65,7 +66,7 @@ class PokerGoIE(PokerGoBaseIE):
'width': image.get('width'),
'height': image.get('height'),
} for image in data_json.get('images') or [] if image.get('url')]
series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == video_id) or {}
series_json = traverse_obj(data_json, ('show_tags', lambda _, v: v['video_id'] == video_id, any)) or {}
return {
'_type': 'url_transparent',

View File

@@ -1,9 +1,9 @@
import datetime as dt
import functools
import json
import urllib.parse
from .common import InfoExtractor
from ..compat import functools
from ..utils import (
ExtractorError,
float_or_none,

View File

@@ -7,6 +7,7 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
join_nonempty,
merge_dicts,
unified_strdate,
)
@@ -147,13 +148,13 @@ class ProSiebenSat1BaseIE(InfoExtractor):
'page_url': 'http://www.prosieben.de',
'tbr': tbr,
'ext': 'flv',
'format_id': 'rtmp{}'.format(f'-{tbr}' if tbr else ''),
'format_id': join_nonempty('rtmp', tbr),
})
else:
formats.append({
'url': source_url,
'tbr': tbr,
'format_id': 'http{}'.format(f'-{tbr}' if tbr else ''),
'format_id': join_nonempty('http', tbr),
})
return {

View File

@@ -1,48 +1,125 @@
import base64
import functools
import json
import random
import re
import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
clean_html,
int_or_none,
join_nonempty,
js_to_json,
str_or_none,
strip_jsonp,
traverse_obj,
unescapeHTML,
url_or_none,
urljoin,
)
class QQMusicIE(InfoExtractor):
class QQMusicBaseIE(InfoExtractor):
def _get_cookie(self, key, default=None):
return getattr(self._get_cookies('https://y.qq.com').get(key), 'value', default)
def _get_g_tk(self):
n = 5381
for c in self._get_cookie('qqmusic_key', ''):
n += (n << 5) + ord(c)
return n & 2147483647
def _get_uin(self):
return int_or_none(self._get_cookie('uin')) or 0
@property
def is_logged_in(self):
return bool(self._get_uin() and self._get_cookie('fqm_pvqid'))
# Reference: m_r_GetRUin() in top_player.js
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
@staticmethod
def _m_r_get_ruin():
cur_ms = int(time.time() * 1000) % 1000
return int(round(random.random() * 2147483647) * cur_ms % 1E10)
def _download_init_data(self, url, mid, fatal=True):
webpage = self._download_webpage(url, mid, fatal=fatal)
return self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage,
'init data', mid, transform_source=js_to_json, fatal=fatal)
def _make_fcu_req(self, req_dict, mid, headers={}, **kwargs):
return self._download_json(
'https://u.y.qq.com/cgi-bin/musicu.fcg', mid, data=json.dumps({
'comm': {
'cv': 0,
'ct': 24,
'format': 'json',
'uin': self._get_uin(),
},
**req_dict,
}, separators=(',', ':')).encode(), headers=headers, **kwargs)
class QQMusicIE(QQMusicBaseIE):
IE_NAME = 'qqmusic'
IE_DESC = 'QQ音乐'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/songDetail/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{
'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
'url': 'https://y.qq.com/n/ryqq/songDetail/004Ti8rT003TaZ',
'md5': 'd7adc5c438d12e2cb648cca81593fd47',
'info_dict': {
'id': '004Ti8rT003TaZ',
'ext': 'mp3',
'title': '永夜のパレード (永夜的游行)',
'album': '幻想遊園郷 -Fantastic Park-',
'release_date': '20111230',
'duration': 281,
'creators': ['ケーキ姫', 'JUMA'],
'genres': ['Pop'],
'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9',
'size': 4501244,
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
'subtitles': 'count:1',
},
}, {
'url': 'https://y.qq.com/n/ryqq/songDetail/004295Et37taLD',
'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
'info_dict': {
'id': '004295Et37taLD',
'ext': 'mp3',
'title': '可惜没如果',
'release_date': '20141227',
'creator': '林俊杰',
'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
'thumbnail': r're:^https?://.*\.jpg$',
'album': '新地球 - 人 (Special Edition)',
'release_date': '20150129',
'duration': 298,
'creators': ['林俊杰'],
'genres': ['Pop'],
'description': 'md5:f568421ff618d2066e74b65a04149c4e',
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
'skip': 'premium member only',
}, {
'note': 'There is no mp3-320 version of this song.',
'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
'url': 'https://y.qq.com/n/ryqq/songDetail/004MsGEo3DdNxV',
'md5': '028aaef1ae13d8a9f4861a92614887f9',
'info_dict': {
'id': '004MsGEo3DdNxV',
'ext': 'mp3',
'title': '如果',
'album': '新传媒电视连续剧金曲系列II',
'release_date': '20050626',
'creator': '李季美',
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 220,
'creators': ['李季美'],
'genres': [],
'description': 'md5:fc711212aa623b28534954dc4bd67385',
'size': 3535730,
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
}, {
'note': 'lyrics not in .lrc format',
'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
'url': 'https://y.qq.com/n/ryqq/songDetail/001JyApY11tIp6',
'info_dict': {
'id': '001JyApY11tIp6',
'ext': 'mp3',
@@ -50,185 +127,193 @@ class QQMusicIE(InfoExtractor):
'release_date': '19970225',
'creator': 'Dark Funeral',
'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
'params': {'skip_download': True},
'skip': 'no longer available',
}]
_FORMATS = {
'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10},
'F000': {'name': 'flac', 'prefix': 'F000', 'ext': 'flac', 'preference': 60},
'A000': {'name': 'ape', 'prefix': 'A000', 'ext': 'ape', 'preference': 50},
'M800': {'name': '320mp3', 'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
'M500': {'name': '128mp3', 'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
'C400': {'name': '96aac', 'prefix': 'C400', 'ext': 'm4a', 'preference': 20, 'abr': 96},
'C200': {'name': '48aac', 'prefix': 'C200', 'ext': 'm4a', 'preference': 20, 'abr': 48},
}
# Reference: m_r_GetRUin() in top_player.js
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
@staticmethod
def m_r_get_ruin():
cur_ms = int(time.time() * 1000) % 1000
return int(round(random.random() * 2147483647) * cur_ms % 1E10)
def _real_extract(self, url):
mid = self._match_id(url)
detail_info_page = self._download_webpage(
f'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid={mid}&play=0',
mid, note='Download song detail info',
errnote='Unable to get song detail info', encoding='gbk')
init_data = self._download_init_data(url, mid, fatal=False)
info_data = self._make_fcu_req({'info': {
'module': 'music.pf_song_detail_svr',
'method': 'get_song_detail_yqq',
'param': {
'song_mid': mid,
'song_type': 0,
},
}}, mid, note='Downloading song info')['info']['data']['track_info']
song_name = self._html_search_regex(
r"songname:\s*'([^']+)'", detail_info_page, 'song name')
media_mid = info_data['file']['media_mid']
publish_time = self._html_search_regex(
r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
'publish time', default=None)
if publish_time:
publish_time = publish_time.replace('-', '')
singer = self._html_search_regex(
r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
lrc_content = self._html_search_regex(
r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
detail_info_page, 'LRC lyrics', default=None)
if lrc_content:
lrc_content = lrc_content.replace('\\n', '\n')
thumbnail_url = None
albummid = self._search_regex(
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
detail_info_page, 'album mid', default=None)
if albummid:
thumbnail_url = f'http://i.gtimg.cn/music/photo/mid_album_500/{albummid[-2:-1]}/{albummid[-1]}/{albummid}.jpg'
guid = self.m_r_get_ruin()
vkey = self._download_json(
f'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid={guid}',
mid, note='Retrieve vkey', errnote='Unable to get vkey',
transform_source=strip_jsonp)['key']
data = self._make_fcu_req({
'req_1': {
'module': 'vkey.GetVkeyServer',
'method': 'CgiGetVkey',
'param': {
'guid': str(self._m_r_get_ruin()),
'songmid': [mid] * len(self._FORMATS),
'songtype': [0] * len(self._FORMATS),
'uin': str(self._get_uin()),
'loginflag': 1,
'platform': '20',
'filename': [f'{f["prefix"]}{media_mid}.{f["ext"]}' for f in self._FORMATS.values()],
},
},
'req_2': {
'module': 'music.musichallSong.PlayLyricInfo',
'method': 'GetPlayLyricInfo',
'param': {'songMID': mid},
},
}, mid, note='Downloading formats and lyric', headers=self.geo_verification_headers())
code = traverse_obj(data, ('req_1', 'code', {int}))
if code != 0:
raise ExtractorError(f'Failed to download format info, error code {code or "unknown"}')
formats = []
for format_id, details in self._FORMATS.items():
for media_info in traverse_obj(data, (
'req_1', 'data', 'midurlinfo', lambda _, v: v['songmid'] == mid and v['purl']),
):
format_key = traverse_obj(media_info, ('filename', {str}, {lambda x: x[:4]}))
format_info = self._FORMATS.get(format_key) or {}
format_id = format_info.get('name')
formats.append({
'url': 'http://cc.stream.qqmusic.qq.com/{}{}.{}?vkey={}&guid={}&fromtag=0'.format(
details['prefix'], mid, details['ext'], vkey, guid),
'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']),
'format': format_id,
'format_id': format_id,
'quality': details['preference'],
'abr': details.get('abr'),
'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})),
'quality': format_info.get('preference'),
'abr': format_info.get('abr'),
'ext': format_info.get('ext'),
'vcodec': 'none',
})
self._check_formats(formats, mid)
actual_lrc_lyrics = ''.join(
line + '\n' for line in re.findall(
r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
if not formats and not self.is_logged_in:
self.raise_login_required()
if traverse_obj(data, ('req_2', 'code')):
self.report_warning(f'Failed to download lyric, error {data["req_2"]["code"]!r}')
lrc_content = traverse_obj(data, ('req_2', 'data', 'lyric', {lambda x: base64.b64decode(x).decode('utf-8')}))
info_dict = {
'id': mid,
'formats': formats,
'title': song_name,
'release_date': publish_time,
'creator': singer,
'description': lrc_content,
'thumbnail': thumbnail_url,
**traverse_obj(info_data, {
'title': ('title', {str}),
'album': ('album', 'title', {str}, {lambda x: x or None}),
'release_date': ('time_public', {lambda x: x.replace('-', '') or None}),
'creators': ('singer', ..., 'name', {str}),
'alt_title': ('subtitle', {str}, {lambda x: x or None}),
'duration': ('interval', {int_or_none}),
}),
**traverse_obj(init_data, ('detail', {
'thumbnail': ('picurl', {url_or_none}),
'description': ('info', 'intro', 'content', ..., 'value', {str}),
'genres': ('info', 'genre', 'content', ..., 'value', {str}, all),
}), get_all=False),
}
if actual_lrc_lyrics:
info_dict['subtitles'] = {
'origin': [{
'ext': 'lrc',
'data': actual_lrc_lyrics,
}],
}
if lrc_content:
info_dict['subtitles'] = {'origin': [{'ext': 'lrc', 'data': lrc_content}]}
info_dict['description'] = join_nonempty(info_dict.get('description'), lrc_content, delim='\n')
return info_dict
class QQPlaylistBaseIE(InfoExtractor):
@staticmethod
def qq_static_url(category, mid):
return f'http://y.qq.com/y/static/{category}/{mid[-2]}/{mid[-1]}/{mid}.html'
def get_singer_all_songs(self, singmid, num):
return self._download_webpage(
r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
query={
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'platform': 'yqq',
'needNewCode': 0,
'singermid': singmid,
'order': 'listen',
'begin': 0,
'num': num,
'songstatus': 1,
})
def get_entries_from_page(self, singmid):
entries = []
default_num = 1
json_text = self.get_singer_all_songs(singmid, default_num)
json_obj_all_songs = self._parse_json(json_text, singmid)
if json_obj_all_songs['code'] == 0:
total = json_obj_all_songs['data']['total']
json_text = self.get_singer_all_songs(singmid, total)
json_obj_all_songs = self._parse_json(json_text, singmid)
for item in json_obj_all_songs['data']['list']:
if item['musicData'].get('songmid') is not None:
songmid = item['musicData']['songmid']
entries.append(self.url_result(
rf'https://y.qq.com/n/yqq/song/{songmid}.html', 'QQMusic', songmid))
return entries
class QQMusicSingerIE(QQPlaylistBaseIE):
class QQMusicSingerIE(QQMusicBaseIE):
IE_NAME = 'qqmusic:singer'
IE_DESC = 'QQ音乐 - 歌手'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
_TEST = {
'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/singer/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{
'url': 'https://y.qq.com/n/ryqq/singer/001BLpXF2DyJe2',
'info_dict': {
'id': '001BLpXF2DyJe2',
'title': '林俊杰',
'description': 'md5:870ec08f7d8547c29c93010899103751',
'description': 'md5:10624ce73b06fa400bc846f59b0305fa',
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
'playlist_mincount': 12,
}
'playlist_mincount': 100,
}, {
'url': 'https://y.qq.com/n/ryqq/singer/000Q00f213YzNV',
'info_dict': {
'id': '000Q00f213YzNV',
'title': '桃几OvO',
'description': '小破站小唱见~希望大家喜欢听我唱歌~',
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
'playlist_count': 12,
'playlist': [{
'info_dict': {
'id': '0016cvsy02mmCl',
'ext': 'mp3',
'title': '群青',
'album': '桃几2021年翻唱集',
'release_date': '20210913',
'duration': 248,
'creators': ['桃几OvO'],
'genres': ['Pop'],
'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae',
'size': 3970822,
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
}],
}]
_PAGE_SIZE = 50
def _fetch_page(self, mid, page_size, page_num):
data = self._make_fcu_req({'req_1': {
'module': 'music.web_singer_info_svr',
'method': 'get_singer_detail_info',
'param': {
'sort': 5,
'singermid': mid,
'sin': page_num * page_size,
'num': page_size,
}}}, mid, note=f'Downloading page {page_num}')
yield from traverse_obj(data, ('req_1', 'data', 'songlist', ..., {lambda x: self.url_result(
f'https://y.qq.com/n/ryqq/songDetail/{x["mid"]}', QQMusicIE, x['mid'], x.get('title'))}))
def _real_extract(self, url):
mid = self._match_id(url)
init_data = self._download_init_data(url, mid, fatal=False)
entries = self.get_entries_from_page(mid)
singer_page = self._download_webpage(url, mid, 'Download singer page')
singer_name = self._html_search_regex(
r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
singer_desc = None
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, mid, self._PAGE_SIZE), self._PAGE_SIZE),
mid, **traverse_obj(init_data, ('singerDetail', {
'title': ('basic_info', 'name', {str}),
'description': ('ex_info', 'desc', {str}),
'thumbnail': ('pic', 'pic', {url_or_none}),
})))
if mid:
singer_desc_page = self._download_xml(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
'Donwload singer description XML',
query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
singer_desc = singer_desc_page.find('./data/info/desc').text
return self.playlist_result(entries, mid, singer_name, singer_desc)
class QQPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, info_json, path):
for song in traverse_obj(info_json, path):
song_mid = song.get('songmid')
if not song_mid:
continue
yield self.url_result(
f'https://y.qq.com/n/ryqq/songDetail/{song_mid}',
QQMusicIE, song_mid, song.get('songname'))
class QQMusicAlbumIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:album'
IE_DESC = 'QQ音乐 - 专辑'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/albumDetail/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{
'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
'url': 'https://y.qq.com/n/ryqq/albumDetail/000gXCTb2AhRR1',
'info_dict': {
'id': '000gXCTb2AhRR1',
'title': '我们都是这样长大的',
@@ -236,10 +321,10 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
},
'playlist_count': 4,
}, {
'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
'url': 'https://y.qq.com/n/ryqq/albumDetail/002Y5a3b3AlCu3',
'info_dict': {
'id': '002Y5a3b3AlCu3',
'title': '그리고...',
'title': '그리고',
'description': 'md5:a48823755615508a95080e81b51ba729',
},
'playlist_count': 8,
@@ -248,49 +333,45 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
def _real_extract(self, url):
mid = self._match_id(url)
album = self._download_json(
f'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid={mid}&format=json',
mid, 'Download album page')['data']
album_json = self._download_json(
'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg',
mid, 'Download album page',
query={'albummid': mid, 'format': 'json'})['data']
entries = [
self.url_result(
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'],
) for song in album['list']
]
album_name = album.get('name')
album_detail = album.get('desc')
if album_detail is not None:
album_detail = album_detail.strip()
entries = self._extract_entries(album_json, ('list', ...))
return self.playlist_result(entries, mid, album_name, album_detail)
return self.playlist_result(entries, mid, **traverse_obj(album_json, {
'title': ('name', {str}),
'description': ('desc', {str.strip}),
}))
class QQMusicToplistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:toplist'
IE_DESC = 'QQ音乐 - 排行榜'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/toplist/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://y.qq.com/n/yqq/toplist/123.html',
'url': 'https://y.qq.com/n/ryqq/toplist/123',
'info_dict': {
'id': '123',
'title': '美国iTunes榜',
'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
'title': r're:美国热门音乐榜 \d{4}-\d{2}-\d{2}',
'description': '美国热门音乐榜,每周一更新。',
},
'playlist_count': 100,
'playlist_count': 95,
}, {
'url': 'https://y.qq.com/n/yqq/toplist/3.html',
'url': 'https://y.qq.com/n/ryqq/toplist/3',
'info_dict': {
'id': '3',
'title': '巅峰榜·欧美',
'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
'title': r're:巅峰榜·欧美 \d{4}-\d{2}-\d{2}',
'description': 'md5:4def03b60d3644be4c9a36f21fd33857',
},
'playlist_count': 100,
}, {
'url': 'https://y.qq.com/n/yqq/toplist/106.html',
'url': 'https://y.qq.com/n/ryqq/toplist/106',
'info_dict': {
'id': '106',
'title': '韩国Mnet榜',
'title': r're:韩国Mnet榜 \d{4}-\d{2}-\d{2}',
'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
},
'playlist_count': 50,
@@ -304,33 +385,20 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
note='Download toplist page',
query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
entries = [self.url_result(
'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
song['data']['songmid'])
for song in toplist_json['songlist']]
topinfo = toplist_json.get('topinfo', {})
list_name = topinfo.get('ListName')
list_description = topinfo.get('info')
return self.playlist_result(entries, list_id, list_name, list_description)
return self.playlist_result(
self._extract_entries(toplist_json, ('songlist', ..., 'data')), list_id,
playlist_title=join_nonempty(*traverse_obj(
toplist_json, ((('topinfo', 'ListName'), 'update_time'), None)), delim=' '),
playlist_description=traverse_obj(toplist_json, ('topinfo', 'info')))
class QQMusicPlaylistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:playlist'
IE_DESC = 'QQ音乐 - 歌单'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/playlist/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
'info_dict': {
'id': '3462654915',
'title': '韩国5月新歌精选下旬',
'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
},
'playlist_count': 40,
'skip': 'playlist gone',
}, {
'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
'url': 'https://y.qq.com/n/ryqq/playlist/1374105607',
'info_dict': {
'id': '1374105607',
'title': '易入人心的华语民谣',
@@ -346,19 +414,83 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
list_id, 'Download list page',
query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
transform_source=strip_jsonp)
transform_source=strip_jsonp, headers={'Referer': url})
if not len(list_json.get('cdlist', [])):
if list_json.get('code'):
raise ExtractorError(
'QQ Music said: error %d in fetching playlist info' % list_json['code'],
expected=True)
raise ExtractorError('Unable to get playlist info')
raise ExtractorError(join_nonempty(
'Unable to get playlist info',
join_nonempty('code', 'subcode', from_dict=list_json),
list_json.get('msg'), delim=': '))
cdlist = list_json['cdlist'][0]
entries = [self.url_result(
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
for song in cdlist['songlist']]
entries = self._extract_entries(list_json, ('cdlist', 0, 'songlist', ...))
list_name = cdlist.get('dissname')
list_description = clean_html(unescapeHTML(cdlist.get('desc')))
return self.playlist_result(entries, list_id, list_name, list_description)
return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, {
'title': ('dissname', {str}),
'description': ('desc', {unescapeHTML}, {clean_html}),
})))
class QQMusicVideoIE(QQMusicBaseIE):
IE_NAME = 'qqmusic:mv'
IE_DESC = 'QQ音乐 - MV'
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/mv/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{
'url': 'https://y.qq.com/n/ryqq/mv/002Vsarh3SVU8K',
'info_dict': {
'id': '002Vsarh3SVU8K',
'ext': 'mp4',
'title': 'The Chant (Extended Mix / Audio)',
'description': '',
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
'release_timestamp': 1688918400,
'release_date': '20230709',
'duration': 313,
'creators': ['Duke Dumont'],
'view_count': int,
},
}]
def _parse_url_formats(self, url_data):
return traverse_obj(url_data, ('mp4', lambda _, v: v['freeflow_url'], {
'url': ('freeflow_url', 0, {url_or_none}),
'filesize': ('fileSize', {int_or_none}),
'format_id': ('newFileType', {str_or_none}),
}))
def _real_extract(self, url):
video_id = self._match_id(url)
video_info = self._make_fcu_req({
'mvInfo': {
'module': 'music.video.VideoData',
'method': 'get_video_info_batch',
'param': {
'vidlist': [video_id],
'required': [
'vid', 'type', 'sid', 'cover_pic', 'duration', 'singers',
'video_pay', 'hint', 'code', 'msg', 'name', 'desc',
'playcnt', 'pubdate', 'play_forbid_reason'],
},
},
'mvUrl': {
'module': 'music.stream.MvUrlProxy',
'method': 'GetMvUrls',
'param': {'vids': [video_id]},
},
}, video_id, headers=self.geo_verification_headers())
if traverse_obj(video_info, ('mvInfo', 'data', video_id, 'play_forbid_reason')) == 3:
self.raise_geo_restricted()
return {
'id': video_id,
'formats': self._parse_url_formats(traverse_obj(video_info, ('mvUrl', 'data', video_id))),
**traverse_obj(video_info, ('mvInfo', 'data', video_id, {
'title': ('name', {str}),
'description': ('desc', {str}),
'thumbnail': ('cover_pic', {url_or_none}),
'release_timestamp': ('pubdate', {int_or_none}),
'duration': ('duration', {int_or_none}),
'creators': ('singers', ..., 'name', {str}),
'view_count': ('playcnt', {int_or_none}),
})),
}

View File

@@ -0,0 +1,198 @@
import base64
import urllib.parse
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
qualities,
remove_start,
smuggle_url,
unsmuggle_url,
update_url_query,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class SproutVideoIE(InfoExtractor):
_NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+'
_VALID_URL = rf'https?:{_NO_SCHEME_RE}'
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']']
_TESTS = [{
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
'info_dict': {
'id': '4c9dddb01910e3c9c4',
'ext': 'mp4',
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
'duration': 576,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}, {
'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27',
'md5': 'cebae5cf558cca83271917cf4ec03f26',
'info_dict': {
'id': 'a79fdcb21f1be2c62e',
'ext': 'mp4',
'title': 'HS_01_Live Stream 2023-01-14 10:00',
'duration': 703,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}, {
# http formats 'sd' and 'hd' are available
'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
'md5': 'f368c78df07e78a749508b221528672c',
'info_dict': {
'id': '119cd6bc1a18e6cd98',
'ext': 'mp4',
'title': '3. Updating your Partner details',
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
'duration': 60,
},
'params': {'format': 'hd'},
}, {
# subtitles
'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd',
'md5': '7f6798f037d7a3e3e07e67959de68fc6',
'info_dict': {
'id': '119dd8ba121ee0cc98',
'ext': 'mp4',
'title': 'Recipients Setup - Domestic Wire Only',
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
'duration': 77,
'subtitles': {'en': 'count:1'},
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
'info_dict': {
'id': '4c9dddb01910e3c9c4',
'ext': 'mp4',
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
'duration': 576,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}]
_M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8'
_QUALITIES = ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it
@staticmethod
def _policy_to_qs(policy, signature_key, as_string=False):
query = {}
for key, value in policy['signatures'][signature_key].items():
query[remove_start(key, 'CloudFront-')] = value
query['sessionID'] = policy['sessionID']
return urllib.parse.urlencode(query, doseq=True) if as_string else query
@classmethod
def _extract_embed_urls(cls, url, webpage):
for embed_url in super()._extract_embed_urls(url, webpage):
if embed_url.startswith('//'):
embed_url = f'https:{embed_url}'
yield smuggle_url(embed_url, {'referer': url})
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
data = self._search_json(
r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
formats, subtitles = [], {}
headers = {
'Accept': '*/*',
'Origin': 'https://videos.sproutvideo.com',
'Referer': url,
}
# HLS extraction is fatal; only attempt it if the JSON data says it's available
if traverse_obj(data, 'hls'):
manifest_query = self._policy_to_qs(data, 'm')
fragment_query = self._policy_to_qs(data, 't', as_string=True)
key_query = self._policy_to_qs(data, 'k', as_string=True)
formats.extend(self._extract_m3u8_formats(
self._M3U8_URL_TMPL.format(**data), video_id, 'mp4',
m3u8_id='hls', headers=headers, query=manifest_query))
for fmt in formats:
fmt.update({
'url': update_url_query(fmt['url'], manifest_query),
'extra_param_to_segment_url': fragment_query,
'extra_param_to_key_url': key_query,
})
if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))):
quality = qualities(self._QUALITIES)
acodec = 'none' if data.get('has_audio') is False else None
formats.extend([{
'format_id': str(format_id),
'url': format_url,
'ext': 'mp4',
'quality': quality(format_id),
'acodec': acodec,
} for format_id, format_url in downloads])
for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))):
subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({
'url': sub_data['src'],
})
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'http_headers': headers,
**traverse_obj(data, {
'title': ('title', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('posterframe_url', {url_or_none}),
}),
}
class VidsIoIE(InfoExtractor):
IE_NAME = 'vids.io'
_VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)'
_TESTS = [{
'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming',
'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e',
'info_dict': {
'id': '799cd8b11c10efc1f0',
'ext': 'mp4',
'title': 'How to Video: Live Streaming',
'duration': 2787,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403)
if urlh.status == 403:
password = self.get_param('videopassword')
if not password:
raise ExtractorError(
'This video is password-protected; use the --video-password option', expected=True)
try:
webpage = self._download_webpage(
url, display_id, 'Submitting video password',
data=urlencode_postdata({
'password': password,
**self._hidden_inputs(webpage),
}))
# Requests with user's session cookie `_sproutvideo_session` are now authorized
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
raise ExtractorError('Incorrect password', expected=True)
raise
if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None):
return self.url_result(embed_url, SproutVideoIE, video_id)
raise ExtractorError('Unable to extract any SproutVideo embed url')

View File

@@ -1,55 +1,31 @@
from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none, traverse_obj
from .vidyard import VidyardBaseIE
from ..utils import ExtractorError, int_or_none, make_archive_id
class SwearnetEpisodeIE(InfoExtractor):
class SwearnetEpisodeIE(VidyardBaseIE):
_VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)'
_TESTS = [{
'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1',
'info_dict': {
'id': '232819',
'id': 'wicK2EOzjOdxkUXGDIgcPw',
'display_id': '232819',
'ext': 'mp4',
'episode_number': 1,
'episode': 'Episode 1',
'duration': 719,
'description': 'md5:c48ef71440ce466284c07085cd7bd761',
'description': r're:Are you drunk and high and craving a grilled cheese sandwich.+',
'season': 'Season 1',
'title': 'Episode 1 - Grilled Cheese Sammich',
'season_number': 1,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/232819/_RX04IKIq60a2V6rIRqq_Q_small.jpg',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/custom/0dd74f9b-388a-452e-b570-b407fb64435b_small.jpg',
'tags': ['Getting Learnt with Ricky', 'drunk', 'grilled cheese', 'high'],
'_old_archive_ids': ['swearnetepisode 232819'],
},
}]
def _get_formats_and_subtitle(self, video_source, video_id):
video_source = video_source or {}
formats, subtitles = [], {}
for key, value in video_source.items():
if key == 'hls':
for video_hls in value:
fmts, subs = self._extract_m3u8_formats_and_subtitles(video_hls.get('url'), video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.extend({
'url': video_mp4.get('url'),
'ext': 'mp4',
} for video_mp4 in value)
return formats, subtitles
def _get_direct_subtitle(self, caption_json):
subs = {}
for caption in caption_json:
subs.setdefault(caption.get('language') or 'und', []).append({
'url': caption.get('vttUrl'),
'name': caption.get('name'),
})
return subs
def _real_extract(self, url):
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
webpage = self._download_webpage(url, display_id)
slug, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
webpage = self._download_webpage(url, slug)
try:
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
@@ -58,22 +34,12 @@ class SwearnetEpisodeIE(InfoExtractor):
self.raise_login_required()
raise
json_data = self._download_json(
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
formats, subtitles = self._get_formats_and_subtitle(json_data['sources'], display_id)
self._merge_subtitles(self._get_direct_subtitle(json_data.get('captions')), target=subtitles)
info = self._process_video_json(self._fetch_video_json(external_id)['chapters'][0], external_id)
if info.get('display_id'):
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
return {
'id': str(json_data['videoId']),
'title': json_data.get('name') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
'description': (json_data.get('description')
or self._html_search_meta(['og:description', 'twitter:description'], webpage)),
'duration': int_or_none(json_data.get('seconds')),
'formats': formats,
'subtitles': subtitles,
**info,
'season_number': int_or_none(season_number),
'episode_number': int_or_none(episode_number),
'thumbnails': [{'url': thumbnail_url}
for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))],
}

View File

@@ -30,6 +30,7 @@ from ..utils import (
try_call,
try_get,
url_or_none,
urlencode_postdata,
)
@@ -43,8 +44,8 @@ class TikTokBaseIE(InfoExtractor):
'iid': None,
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
'app_name': 'musical_ly',
'app_version': '34.1.2',
'manifest_app_version': '2023401020',
'app_version': '35.1.3',
'manifest_app_version': '2023501030',
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
'aid': '0',
}
@@ -114,7 +115,7 @@ class TikTokBaseIE(InfoExtractor):
'universal data', display_id, end_pattern=r'</script>', default={}),
('__DEFAULT_SCOPE__', {dict})) or {}
def _call_api_impl(self, ep, query, video_id, fatal=True,
def _call_api_impl(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
@@ -125,7 +126,8 @@ class TikTokBaseIE(InfoExtractor):
fatal=fatal, note=note, errnote=errnote, headers={
'User-Agent': self._APP_USER_AGENT,
'Accept': 'application/json',
}, query=query)
**(headers or {}),
}, query=query, data=data)
def _build_api_query(self, query):
return filter_dict({
@@ -174,7 +176,7 @@ class TikTokBaseIE(InfoExtractor):
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
})
def _call_api(self, ep, query, video_id, fatal=True,
def _call_api(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
if not self._APP_INFO and not self._get_next_app_info():
message = 'No working app info is available'
@@ -187,9 +189,11 @@ class TikTokBaseIE(InfoExtractor):
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
for count in itertools.count(1):
self.write_debug(str(self._APP_INFO))
real_query = self._build_api_query(query)
real_query = self._build_api_query(query or {})
try:
return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
return self._call_api_impl(
ep, video_id, query=real_query, data=data, headers=headers,
fatal=fatal, note=note, errnote=errnote)
except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
message = str(e.cause or e.msg)
@@ -204,12 +208,13 @@ class TikTokBaseIE(InfoExtractor):
raise
def _extract_aweme_app(self, aweme_id):
feed_list = self._call_api(
'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed',
errnote='Unable to download video feed').get('aweme_list') or []
aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
aweme_detail = traverse_obj(
self._call_api('multi/aweme/detail', aweme_id, data=urlencode_postdata({
'aweme_ids': f'[{aweme_id}]',
'request_source': '0',
}), headers={'X-Argus': ''}), ('aweme_details', 0, {dict}))
if not aweme_detail:
raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id)
return self._parse_aweme_video_app(aweme_detail)
def _extract_web_data_and_status(self, url, video_id, fatal=True):
@@ -1037,7 +1042,8 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul
for retry in self.RetryManager():
try:
post_list = self._call_api(
self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
self._API_ENDPOINT, display_id, query=query,
note=f'Downloading video list page {page}',
errnote='Unable to download video list')
except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:

View File

@@ -28,35 +28,11 @@ class ToggleIE(InfoExtractor):
'skip_download': 'm3u8 download',
},
}, {
'note': 'DRM-protected video',
'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
'info_dict': {
'id': '341413',
'ext': 'wvm',
'title': 'Dug\'s Special Mission',
'description': 'md5:e86c6f4458214905c1772398fabc93e0',
'upload_date': '20150827',
'timestamp': 1440644006,
},
'params': {
'skip_download': 'DRM-protected wvm download',
},
'only_matching': True,
}, {
# this also tests correct video id extraction
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
'info_dict': {
'id': '332861',
'ext': 'mp4',
'title': '28th SEA Games (5 Show) - Episode 11',
'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
'upload_date': '20150605',
'timestamp': 1433480166,
},
'params': {
'skip_download': 'DRM-protected wvm download',
},
'skip': 'm3u8 links are geo-restricted',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
'only_matching': True,

View File

@@ -21,7 +21,7 @@ class TubeTuGrazBaseIE(InfoExtractor):
if not urlh:
return
content, urlh = self._download_webpage_handle(
response = self._download_webpage_handle(
urlh.url, None, fatal=False, headers={'referer': urlh.url},
note='logging in', errnote='unable to log in',
data=urlencode_postdata({
@@ -30,7 +30,11 @@ class TubeTuGrazBaseIE(InfoExtractor):
'j_username': username,
'j_password': password,
}))
if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
if not response:
return
content, urlh = response
if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
return
if not self._html_search_regex(
@@ -39,7 +43,7 @@ class TubeTuGrazBaseIE(InfoExtractor):
self.report_warning('unable to login: incorrect password')
return
content, urlh = self._download_webpage_handle(
urlh = self._request_webpage(
urlh.url, None, fatal=False, headers={'referer': urlh.url},
note='logging in with TFA', errnote='unable to log in with TFA',
data=urlencode_postdata({

View File

@@ -14,6 +14,7 @@ from ..utils import (
float_or_none,
format_field,
int_or_none,
join_nonempty,
make_archive_id,
remove_end,
str_or_none,
@@ -107,7 +108,7 @@ class TwitterBaseIE(InfoExtractor):
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = {
'url': variant_url,
'format_id': 'http' + (f'-{tbr}' if tbr else ''),
'format_id': join_nonempty('http', tbr),
'tbr': tbr,
}
self._search_dimensions_in_video_url(f, variant_url)

426
yt_dlp/extractor/vidyard.py Normal file
View File

@@ -0,0 +1,426 @@
import functools
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
float_or_none,
int_or_none,
join_nonempty,
mimetype2ext,
parse_resolution,
str_or_none,
unescapeHTML,
url_or_none,
)
from ..utils.traversal import traverse_obj
class VidyardBaseIE(InfoExtractor):
_HEADERS = {'Referer': 'https://play.vidyard.com/'}
def _get_formats_and_subtitles(self, sources, video_id):
formats, subtitles = [], {}
def add_hls_fmts_and_subs(m3u8_url):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', m3u8_id='hls', headers=self._HEADERS, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
hls_list = isinstance(sources, dict) and sources.pop('hls', None)
if master_m3u8_url := traverse_obj(
hls_list, (lambda _, v: v['profile'] == 'auto', 'url', {url_or_none}, any)):
add_hls_fmts_and_subs(master_m3u8_url)
if not formats: # These are duplicate and unnecesary requests if we got 'auto' hls fmts
for variant_m3u8_url in traverse_obj(hls_list, (..., 'url', {url_or_none})):
add_hls_fmts_and_subs(variant_m3u8_url)
for source_type, source_list in traverse_obj(sources, ({dict.items}, ...)):
for source in traverse_obj(source_list, lambda _, v: url_or_none(v['url'])):
profile = source.get('profile')
formats.append({
'url': source['url'],
'ext': mimetype2ext(source.get('mimeType'), default=None),
'format_id': join_nonempty('http', source_type, profile),
**parse_resolution(profile),
})
self._remove_duplicate_formats(formats)
return formats, subtitles
def _get_direct_subtitles(self, caption_json):
subs = {}
for caption in traverse_obj(caption_json, lambda _, v: url_or_none(v['vttUrl'])):
subs.setdefault(caption.get('language') or 'und', []).append({
'url': caption['vttUrl'],
'name': caption.get('name'),
})
return subs
def _fetch_video_json(self, video_id):
return self._download_json(
f'https://play.vidyard.com/player/{video_id}.json', video_id)['payload']
def _process_video_json(self, json_data, video_id):
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], video_id)
self._merge_subtitles(self._get_direct_subtitles(json_data.get('captions')), target=subtitles)
return {
**traverse_obj(json_data, {
'id': ('facadeUuid', {str}),
'display_id': ('videoId', {int}, {str_or_none}),
'title': ('name', {str}),
'description': ('description', {str}, {unescapeHTML}, {lambda x: x or None}),
'duration': ((
('milliseconds', {functools.partial(float_or_none, scale=1000)}),
('seconds', {int_or_none})), any),
'thumbnails': ('thumbnailUrls', ('small', 'normal'), {'url': {url_or_none}}),
'tags': ('tags', ..., 'name', {str}),
}),
'formats': formats,
'subtitles': subtitles,
'http_headers': self._HEADERS,
}
class VidyardIE(VidyardBaseIE):
_VALID_URL = [
r'https?://[\w-]+(?:\.hubs)?\.vidyard\.com/watch/(?P<id>[\w-]+)',
r'https?://(?:embed|share)\.vidyard\.com/share/(?P<id>[\w-]+)',
r'https?://play\.vidyard\.com/(?:player/)?(?P<id>[\w-]+)',
]
_EMBED_REGEX = [r'<iframe[^>]* src=["\'](?P<url>(?:https?:)?//play\.vidyard\.com/[\w-]+)']
_TESTS = [{
'url': 'https://vyexample03.hubs.vidyard.com/watch/oTDMPlUv--51Th455G5u7Q',
'info_dict': {
'id': 'oTDMPlUv--51Th455G5u7Q',
'display_id': '50347',
'ext': 'mp4',
'title': 'Homepage Video',
'description': 'Look I changed the description.',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
'duration': 99,
'tags': ['these', 'are', 'all', 'tags'],
},
}, {
'url': 'https://share.vidyard.com/watch/PaQzDAT1h8JqB8ivEu2j6Y?',
'info_dict': {
'id': 'PaQzDAT1h8JqB8ivEu2j6Y',
'display_id': '9281024',
'ext': 'mp4',
'title': 'Inline Embed',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
'duration': 41.186,
},
}, {
'url': 'https://embed.vidyard.com/share/oTDMPlUv--51Th455G5u7Q',
'info_dict': {
'id': 'oTDMPlUv--51Th455G5u7Q',
'display_id': '50347',
'ext': 'mp4',
'title': 'Homepage Video',
'description': 'Look I changed the description.',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
'duration': 99,
'tags': ['these', 'are', 'all', 'tags'],
},
}, {
# First video from playlist below
'url': 'https://embed.vidyard.com/share/SyStyHtYujcBHe5PkZc5DL',
'info_dict': {
'id': 'SyStyHtYujcBHe5PkZc5DL',
'display_id': '41974005',
'ext': 'mp4',
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
'description': r're:In this video, you will learn how to prepare the frame.+',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
'duration': 258.666,
},
}, {
# Playlist
'url': 'https://thelink.hubs.vidyard.com/watch/pwu7pCYWSwAnPxs8nDoFrE',
'info_dict': {
'id': 'pwu7pCYWSwAnPxs8nDoFrE',
'title': 'PLAYLIST - Palm Beach Shutters- Bi-Fold Track System Installation',
'entries': [{
'id': 'SyStyHtYujcBHe5PkZc5DL',
'display_id': '41974005',
'ext': 'mp4',
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
'duration': 258.666,
}, {
'id': '1Fw4B84jZTXLXWqkE71RiM',
'display_id': '5861113',
'ext': 'mp4',
'title': 'Palm Beach - Bi-Fold Track System "Frame Installation"',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861113/29CJ54s5g1_aP38zkKLHew_small.jpg',
'duration': 167.858,
}, {
'id': 'DqP3wBvLXSpxrcqpT5kEeo',
'display_id': '41976334',
'ext': 'mp4',
'title': 'Install the Track for Palm Beach Polysatin Shutters With BiFold Track',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861090/RwG2VaTylUa6KhSTED1r1Q_small.png',
'duration': 94.229,
}, {
'id': 'opfybfxpzQArxqtQYB6oBU',
'display_id': '41976364',
'ext': 'mp4',
'title': 'Install the Panel for Palm Beach Polysatin Shutters With BiFold Track',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860926/JIOaJR08dM4QgXi_iQ2zGA_small.png',
'duration': 191.467,
}, {
'id': 'rWrXvkbTNNaNqD6189HJya',
'display_id': '41976382',
'ext': 'mp4',
'title': 'Adjust the Panels for Palm Beach Polysatin Shutters With BiFold Track',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860687/CwHxBv4UudAhOh43FVB4tw_small.png',
'duration': 138.155,
}, {
'id': 'eYPTB521MZ9TPEArSethQ5',
'display_id': '41976409',
'ext': 'mp4',
'title': 'Assemble and Install the Valance for Palm Beach Polysatin Shutters With BiFold Track',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861425/0y68qlMU4O5VKU7bJ8i_AA_small.png',
'duration': 148.224,
}],
},
'playlist_count': 6,
}, {
# Non hubs.vidyard.com playlist
'url': 'https://salesforce.vidyard.com/watch/d4vqPjs7Q5EzVEis5QT3jd',
'info_dict': {
'id': 'd4vqPjs7Q5EzVEis5QT3jd',
'title': 'How To: Service Cloud: Import External Content in Lightning Knowledge',
'entries': [{
'id': 'mcjDpSZir2iSttbvFkx6Rv',
'display_id': '29479036',
'ext': 'mp4',
'title': 'Welcome to this Expert Coaching Series',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/ouyQi9WuwyiOupChUWNmjQ/7170d3485ba602e012df05_small.jpg',
'duration': 38.205,
}, {
'id': '84bPYwpg243G6xYEfJdYw9',
'display_id': '21820704',
'ext': 'mp4',
'title': 'Chapter 1 - Title + Agenda',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/HFPN0ZgQq4Ow8BghGcQSow/bfaa30123c8f6601e7d7f2_small.jpg',
'duration': 98.016,
}, {
'id': 'nP17fMuvA66buVHUrzqjTi',
'display_id': '21820707',
'ext': 'mp4',
'title': 'Chapter 2 - Import Options',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rGRIF5nFjPI9OOA2qJ_Dbg/86a8d02bfec9a566845dd4_small.jpg',
'duration': 199.136,
}, {
'id': 'm54EcwXdpA5gDBH5rgCYoV',
'display_id': '21820710',
'ext': 'mp4',
'title': 'Chapter 3 - Importing Article Translations',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/IVX4XR8zpSsiNIHx45kz-A/1ccbf8a29a33856d06b3ed_small.jpg',
'duration': 184.352,
}, {
'id': 'j4nzS42oq4hE9oRV73w3eQ',
'display_id': '21820716',
'ext': 'mp4',
'title': 'Chapter 4 - Best Practices',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/BtrRrQpRDLbA4AT95YQyog/1f1e6b8e7fdc3fa95ec8d3_small.jpg',
'duration': 296.960,
}, {
'id': 'y28PYfW5pftvers9PXzisC',
'display_id': '21820727',
'ext': 'mp4',
'title': 'Chapter 5 - Migration Steps',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/K2CdQOXDfLcrVTF60r0bdw/a09239ada28b6ffce12b1f_small.jpg',
'duration': 620.640,
}, {
'id': 'YWU1eQxYvhj29SjYoPw5jH',
'display_id': '21820733',
'ext': 'mp4',
'title': 'Chapter 6 - Demo',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rsmhP-cO8dAa8ilvFGCX0g/7911ef415167cd14032068_small.jpg',
'duration': 631.456,
}, {
'id': 'nmEvVqpwdJUgb74zKsLGxn',
'display_id': '29479037',
'ext': 'mp4',
'title': 'Schedule Your Follow-Up',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/Rtwc7X4PEkF4Ae5kHi-Jvw/174ebed3f34227b1ffa1d0_small.jpg',
'duration': 33.608,
}],
},
'playlist_count': 8,
}, {
# URL of iframe embed src
'url': 'https://play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.html',
'info_dict': {
'id': 'iDqTwWGrd36vaLuaCY3nTs',
'display_id': '9281009',
'ext': 'mp4',
'title': 'Lightbox Embed',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
'duration': 39.035,
},
}, {
# Player JSON URL
'url': 'https://play.vidyard.com/player/7GAApnNNbcZZ46k6JqJQSh.json?disable_analytics=0',
'info_dict': {
'id': '7GAApnNNbcZZ46k6JqJQSh',
'display_id': '820026',
'ext': 'mp4',
'title': 'The Art of Storytelling: How to Deliver Your Brand Story with Content & Social',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/MhbE-5sEFQu4x3fI6FkNlA/41eb5717c557cd19456910_small.jpg',
'duration': 2153.013,
'tags': ['Summit2017'],
},
}, {
'url': 'http://share.vidyard.com/share/diYeo6YR2yiGgL8odvS8Ri',
'only_matching': True,
}, {
'url': 'https://play.vidyard.com/FFlz3ZpxhIfKQ1fd9DAryA',
'only_matching': True,
}, {
'url': 'https://play.vidyard.com/qhMAu5A76GZVrFzOPgSf9A/type/standalone',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# URL containing inline/lightbox embedded video
'url': 'https://resources.altium.com/p/2-the-extreme-importance-of-pc-board-stack-up',
'info_dict': {
'id': 'GDx1oXrFWj4XHbipfoXaMn',
'display_id': '3225198',
'ext': 'mp4',
'title': 'The Extreme Importance of PC Board Stack Up',
'thumbnail': 'https://cdn.vidyard.com/thumbnails/73_Q3_hBexWX7Og1sae6cg/9998fa4faec921439e2c04_small.jpg',
'duration': 3422.742,
},
}, {
# <script ... id="vidyard_embed_code_DXx2sW4WaLA6hTdGFz7ja8" src="//play.vidyard.com/DXx2sW4WaLA6hTdGFz7ja8.js?
'url': 'http://videos.vivint.com/watch/DXx2sW4WaLA6hTdGFz7ja8',
'info_dict': {
'id': 'DXx2sW4WaLA6hTdGFz7ja8',
'display_id': '2746529',
'ext': 'mp4',
'title': 'How To Powercycle the Smart Hub Panel',
'duration': 30.613,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/_-6cw8xQUJ3qiCs_JENc_A/b21d7a5e47967f49399d30_small.jpg',
},
}, {
# <script id="vidyard_embed_code_MIBHhiLVTxga7wqLsuoDjQ" src="//embed.vidyard.com/embed/MIBHhiLVTxga7wqLsuoDjQ/inline?v=2.1">
'url': 'https://www.babypips.com/learn/forex/introduction-to-metatrader4',
'info_dict': {
'id': 'MIBHhiLVTxga7wqLsuoDjQ',
'display_id': '20291',
'ext': 'mp4',
'title': 'Lesson 1 - Opening an MT4 Account',
'description': 'Never heard of MetaTrader4? Here\'s the 411 on the popular trading platform!',
'duration': 168,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/20291/IM-G2WXQR9VBLl2Cmzvftg_small.jpg',
},
}, {
# <iframe ... src="//play.vidyard.com/d61w8EQoZv1LDuPxDkQP2Q/type/background?preview=1"
'url': 'https://www.avaya.com/en/',
'info_dict': {
# These values come from the generic extractor and don't matter
'id': str,
'title': str,
'age_limit': 0,
'upload_date': str,
'description': str,
'thumbnail': str,
'timestamp': float,
},
'playlist': [{
'info_dict': {
'id': 'd61w8EQoZv1LDuPxDkQP2Q',
'display_id': '42456529',
'ext': 'mp4',
'title': 'GettyImages-1027',
'duration': 6.0,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42061563/p6bY08d2N4e4IDz-7J4_wkgsPq3-qgcx_small.jpg',
},
}, {
'info_dict': {
'id': 'VAsYDi7eiqZRbHodUA2meC',
'display_id': '42456569',
'ext': 'mp4',
'title': 'GettyImages-1325598833',
'duration': 6.083,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42052358/y3qrbDpn_2quWr_5XBi7yzS3UvEI__ZM_small.jpg',
},
}],
'playlist_count': 2,
}, {
# <div class="vidyard-player-embed" data-uuid="vpCWTVHw3qrciLtVY94YkS"
'url': 'https://www.gogoair.com/',
'info_dict': {
# These values come from the generic extractor and don't matter
'id': str,
'title': str,
'description': str,
'age_limit': 0,
},
'playlist': [{
'info_dict': {
'id': 'vpCWTVHw3qrciLtVY94YkS',
'display_id': '40780699',
'ext': 'mp4',
'title': 'Upgrade to AVANCE 100% worth it - Jason Talley, Owner and Pilot, Testimonial',
'description': 'md5:f609824839439a51990cef55ffc472aa',
'duration': 70.737,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/40780699/KzjfYZz5MZl2gHF_e-4i2c6ib1cLDweQ_small.jpg',
},
}, {
'info_dict': {
'id': 'xAmV9AsLbnitCw35paLBD8',
'display_id': '31130867',
'ext': 'mp4',
'title': 'Brad Keselowski goes faster with Gogo AVANCE inflight Wi-Fi',
'duration': 132.565,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/31130867/HknyDtLdm2Eih9JZ4A5XLjhfBX_6HRw5_small.jpg',
},
}, {
'info_dict': {
'id': 'RkkrFRNxfP79nwCQavecpF',
'display_id': '39009815',
'ext': 'mp4',
'title': 'Live Demo of Gogo Galileo',
'description': 'md5:e2df497236f4e12c3fef8b392b5f23e0',
'duration': 112.128,
'thumbnail': 'https://cdn.vidyard.com/thumbnails/38144873/CWLlxfUbJ4Gh0ThbUum89IsEM4yupzMb_small.jpg',
},
}],
'playlist_count': 3,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
# Handle protocol-less embed URLs
for embed_url in super()._extract_embed_urls(url, webpage):
if embed_url.startswith('//'):
embed_url = f'https:{embed_url}'
yield embed_url
# Extract inline/lightbox embeds
for embed_element in re.findall(
r'(<(?:img|div)[^>]* class=(["\'])(?:[^>"\']* )?vidyard-player-embed(?: [^>"\']*)?\2[^>]+>)', webpage):
if video_id := extract_attributes(embed_element[0]).get('data-uuid'):
yield f'https://play.vidyard.com/{video_id}'
for embed_id in re.findall(r'<script[^>]* id=["\']vidyard_embed_code_([\w-]+)["\']', webpage):
yield f'https://play.vidyard.com/{embed_id}'
def _real_extract(self, url):
video_id = self._match_id(url)
video_json = self._fetch_video_json(video_id)
if len(video_json['chapters']) == 1:
return self._process_video_json(video_json['chapters'][0], video_id)
return self.playlist_result(
[self._process_video_json(chapter, video_id) for chapter in video_json['chapters']],
str(video_json['playerUuid']), video_json.get('name'))

View File

@@ -5,6 +5,7 @@ from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
parse_age_limit,
traverse_obj,
)
@@ -120,7 +121,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'height', default=None))
formats.append({
'url': video_asset_url,
'format_id': 'http{}'.format(f'-{bitrate}' if bitrate else ''),
'format_id': join_nonempty('http', bitrate),
'tbr': bitrate,
'height': height,
'vcodec': video_asset.get('codec'),

View File

@@ -1,6 +1,7 @@
import base64
import functools
import itertools
import json
import re
import urllib.parse
@@ -14,6 +15,7 @@ from ..utils import (
determine_ext,
get_element_by_class,
int_or_none,
join_nonempty,
js_to_json,
merge_dicts,
parse_filesize,
@@ -84,29 +86,23 @@ class VimeoBaseInfoExtractor(InfoExtractor):
expected=True)
return password
def _verify_video_password(self, url, video_id, password, token, vuid):
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
url + '/password', video_id, 'Verifying the password',
'Wrong password', data=urlencode_postdata({
'password': password,
'token': token,
}), headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': url,
})
def _extract_xsrft_and_vuid(self, webpage):
xsrft = self._search_regex(
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
webpage, 'login token', group='xsrft')
vuid = self._search_regex(
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
webpage, 'vuid', group='vuid')
return xsrft, vuid
def _verify_video_password(self, video_id, password, token):
url = f'https://vimeo.com/{video_id}'
try:
return self._download_webpage(
f'{url}/password', video_id,
'Submitting video password', data=json.dumps({
'password': password,
'token': token,
}, separators=(',', ':')).encode(), headers={
'Accept': '*/*',
'Content-Type': 'application/json',
'Referer': url,
}, impersonate=True)
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 418:
raise ExtractorError('Wrong password', expected=True)
raise
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
vimeo_config = self._search_regex(
@@ -745,21 +741,34 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise ExtractorError('Wrong video password', expected=True)
return checked
def _extract_from_api(self, video_id, unlisted_hash=None):
token = self._download_json(
'https://vimeo.com/_rv/jwt', video_id, headers={
'X-Requested-With': 'XMLHttpRequest',
})['token']
api_url = 'https://api.vimeo.com/videos/' + video_id
if unlisted_hash:
api_url += ':' + unlisted_hash
video = self._download_json(
api_url, video_id, headers={
'Authorization': 'jwt ' + token,
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
return self._download_json(
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json',
}, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
})
def _extract_from_api(self, video_id, unlisted_hash=None):
viewer = self._download_json(
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
for retry in (False, True):
try:
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
except ExtractorError as e:
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
and 'password' in traverse_obj(
e.cause.response.read(),
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
)):
self._verify_video_password(
video_id, self._get_video_password(), viewer['xsrft'])
continue
raise
info = self._parse_config(self._download_json(
video['config_url'], video_id), video_id)
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
@@ -829,21 +838,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id
self._try_album_password(url)
is_secure = urllib.parse.urlparse(url).scheme == 'https'
try:
# Retrieve video webpage to extract further information
webpage, urlh = self._download_webpage_handle(
url, video_id, headers=headers)
url, video_id, headers=headers, impersonate=is_secure)
redirect_url = urlh.url
except ExtractorError as ee:
if isinstance(ee.cause, HTTPError) and ee.cause.status == 403:
errmsg = ee.cause.response.read()
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
raise ExtractorError(
'Cannot download embed-only video without embedding '
'URL. Please call yt-dlp with the URL of the page '
'that embeds this video.',
expected=True)
raise
except ExtractorError as error:
if not isinstance(error.cause, HTTPError) or error.cause.status not in (403, 429):
raise
errmsg = error.cause.response.read()
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
raise ExtractorError(
'Cannot download embed-only video without embedding URL. Please call yt-dlp '
'with the URL of the page that embeds this video.', expected=True)
# 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
status = error.cause.status
dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
if target := error.cause.response.extensions.get('impersonate'):
raise ExtractorError(
f'Got HTTP Error {status} when using impersonate target "{target}". {dcip_msg}')
elif not is_secure:
raise ExtractorError(f'Got HTTP Error {status}. {dcip_msg}', expected=True)
raise ExtractorError(
'This request has been blocked due to its TLS fingerprint. Install a '
'required impersonation dependency if possible, or else if you are okay with '
f'{self._downloader._format_err("compromising your security/cookies", "light red")}, '
f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True)
if '://player.vimeo.com/video/' in url:
config = self._search_json(
@@ -853,12 +874,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
redirect_url, video_id, headers)
return self._parse_config(config, video_id)
if re.search(r'<form[^>]+?id="pw_form"', webpage):
video_password = self._get_video_password()
token, vuid = self._extract_xsrft_and_vuid(webpage)
webpage = self._verify_video_password(
redirect_url, video_id, video_password, token, vuid)
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config:
seed_status = vimeo_config.get('seed_status') or {}
@@ -1278,9 +1293,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
video_password = self._get_video_password()
viewer = self._download_json(
'https://vimeo.com/_rv/viewer', video_id)
webpage = self._verify_video_password(
'https://vimeo.com/' + video_id, video_id,
video_password, viewer['xsrft'], viewer['vuid'])
webpage = self._verify_video_password(video_id, video_password, viewer['xsrft'])
clip_page_config = self._parse_json(self._search_regex(
r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
webpage, 'clip page config'), video_id)

108
yt_dlp/extractor/vtv.py Normal file
View File

@@ -0,0 +1,108 @@
from .common import InfoExtractor
from ..utils import extract_attributes, get_element_html_by_class, remove_start
class VTVGoIE(InfoExtractor):
_VALID_URL = [
r'https?://(?:www\.)?vtvgo\.vn/(kho-video|tin-tuc)/[\w.-]*?(?P<id>\d+)(?:\.[a-z]+|/)?(?:$|[?#])',
r'https?://(?:www\.)?vtvgo\.vn/digital/detail\.php\?(?:[^#]+&)?content_id=(?P<id>\d+)',
]
_TESTS = [{
'url': 'https://vtvgo.vn/kho-video/bep-vtv-vit-chao-rieng-so-24-888456.html',
'info_dict': {
'id': '888456',
'ext': 'mp4',
'title': 'Bếp VTV | Vịt chao riềng | Số 24',
'description': 'md5:2b4e93ec2b954304170d32be288ce2c8',
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20230201/VIT-CHAO-RIENG_VTV_638108894672812459.jpg',
},
}, {
'url': 'https://vtvgo.vn/tin-tuc/hot-search-1-zlife-khong-ngo-toi-phai-khong-862074',
'info_dict': {
'id': '862074',
'ext': 'mp4',
'title': 'Hot Search #1 | Zlife | Không ngờ tới phải không? ',
'description': 'md5:e967d0e2efbbebbee8814a55799b4d0f',
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20220504/6b9a8552-e71c-46ce-bc9d-50c9bb506f9c.jpeg',
},
}, {
'url': 'https://vtvgo.vn/kho-video/918311.html',
'info_dict': {
'id': '918311',
'title': 'Cà phê sáng | 05/02/2024 | Tái hiện hình ảnh Hà Nội xưa tại ngôi nhà di sản',
'ext': 'mp4',
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240205/0506_ca_phe_sang_638427226021318322.jpg',
'description': 'md5:b121c67948f1ce58e6a036042fc14c1b',
},
}, {
'url': 'https://vtvgo.vn/digital/detail.php?digital_id=168&content_id=918634',
'info_dict': {
'id': '918634',
'ext': 'mp4',
'title': 'Gặp nhau cuối năm | Táo quân 2024',
'description': 'md5:a1c221e78e5954d29d49b2a11c20513c',
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240210/d0f73369-8f03-4108-9edd-83d4bc3997b2.png',
},
}, {
'url': 'https://vtvgo.vn/digital/detail.php?content_id=919358',
'info_dict': {
'id': '919358',
'ext': 'mp4',
'title': 'Chúng ta của 8 năm sau | Tập 45 | Dương có bằng chứng, nhân chứng vạch mặt ông Khiêm',
'description': 'md5:16ff5208cac6585137f554472a4677f3',
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240221/550deff9-7736-4a0e-8b5d-33274d97cd7d.jpg',
},
}, {
'url': 'https://vtvgo.vn/kho-video/888456',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8_url = self._search_regex(
r'(?:var\s+link\s*=\s*|addPlayer\()["\'](https://[^"\']+/index\.m3u8)["\']', webpage, 'm3u8 url')
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
}
class VTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vtv\.vn/video/[\w-]*?(?P<id>\d+)\.htm'
_TESTS = [{
'url': 'https://vtv.vn/video/thoi-su-20h-vtv1-12-6-2024-680411.htm',
'info_dict': {
'id': '680411',
'ext': 'mp4',
'title': 'Thời sự 20h VTV1 - 12/6/2024 - Video đã phát trên VTV1 | VTV.VN',
'thumbnail': 'https://cdn-images.vtv.vn/zoom/600_315/66349b6076cb4dee98746cf1/2024/06/12/thumb/1206-ts-20h-02929741475480320806760.mp4/thumb0.jpg',
},
}, {
'url': 'https://vtv.vn/video/zlife-1-khong-ngo-toi-phai-khong-vtv24-560248.htm',
'info_dict': {
'id': '560248',
'ext': 'mp4',
'title': 'ZLife #1: Không ngờ tới phải không? | VTV24 - Video đã phát trên VTV-NEWS | VTV.VN',
'description': 'Ai đứng sau vụ việc thay đổi ảnh đại diện trên các trang mạng xã hội của VTV Digital tối 2/5?',
'thumbnail': 'https://video-thumbs.mediacdn.vn/zoom/600_315/vtv/2022/5/13/t67s6btf3ji-16524555726231894427334.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_vid = extract_attributes(get_element_html_by_class(
'VCSortableInPreviewMode', get_element_html_by_class(
'video-highlight-box', webpage)))['data-vid']
m3u8_url = f'https://cdn-videos.vtv.vn/{remove_start(data_vid, "vtv.mediacdn.vn/")}/master.m3u8'
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
}

View File

@@ -52,6 +52,7 @@ class WeiboBaseIE(InfoExtractor):
})
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
# XXX: Always fatal; _download_webpage_handle only returns False (not a tuple) on error
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
self._update_visitor_cookies(urlh.url, video_id)

View File

@@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
join_nonempty,
unified_strdate,
)
@@ -76,7 +77,7 @@ class WSJIE(InfoExtractor):
tbr = int_or_none(v.get('bitrate'))
formats.append({
'url': mp4_url,
'format_id': 'http' + (f'-{tbr}' if tbr else ''),
'format_id': join_nonempty('http', tbr),
'tbr': tbr,
'width': int_or_none(v.get('width')),
'height': int_or_none(v.get('height')),

View File

@@ -8,6 +8,7 @@ from ..utils import (
ExtractorError,
clean_html,
int_or_none,
join_nonempty,
mimetype2ext,
parse_iso8601,
traverse_obj,
@@ -213,7 +214,7 @@ class YahooIE(InfoExtractor):
tbr = int_or_none(s.get('bitrate'))
formats.append({
'url': s_url,
'format_id': fmt + (f'-{tbr}' if tbr else ''),
'format_id': join_nonempty(fmt, tbr),
'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')),
'tbr': tbr,
@@ -371,12 +372,13 @@ class YahooJapanNewsIE(InfoExtractor):
url, content_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
bitrate = int_or_none(vid.get('bitrate'))
formats.append({
'url': url,
'format_id': f'http-{vid.get("bitrate")}',
'format_id': join_nonempty('http', bitrate),
'height': int_or_none(vid.get('height')),
'width': int_or_none(vid.get('width')),
'tbr': int_or_none(vid.get('bitrate')),
'tbr': bitrate,
})
self._remove_duplicate_formats(formats)

View File

@@ -5,6 +5,7 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
join_nonempty,
mimetype2ext,
try_get,
urljoin,
@@ -116,12 +117,9 @@ class YandexDiskIE(InfoExtractor):
else:
size = video.get('size') or {}
height = int_or_none(size.get('height'))
format_id = 'hls'
if height:
format_id += f'-{height}p'
formats.append({
'ext': 'mp4',
'format_id': format_id,
'format_id': join_nonempty('hls', height and f'{height}p'),
'height': height,
'protocol': 'm3u8_native',
'url': format_url,

View File

@@ -11,6 +11,7 @@ from ..utils import (
class YleAreenaIE(InfoExtractor):
_VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)'
_GEO_COUNTRIES = ['FI']
_TESTS = [
{
'url': 'https://areena.yle.fi/1-4371942',
@@ -19,7 +20,7 @@ class YleAreenaIE(InfoExtractor):
'id': '0_a3tjk92c',
'ext': 'mp4',
'title': 'Pouchit',
'description': 'md5:d487309c3abbe5650265bbd1742d2f82',
'description': 'md5:01071d7056ceec375f63960f90c35366',
'series': 'Modernit miehet',
'season': 'Season 1',
'season_number': 1,
@@ -87,8 +88,8 @@ class YleAreenaIE(InfoExtractor):
})
# Example title: 'K1, J2: Pouchit | Modernit miehet'
series, season_number, episode_number, episode = self._search_regex(
r'K(?P<season_no>[\d]+),\s*J(?P<episode_no>[\d]+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
season_number, episode_number, episode, series = self._search_regex(
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
default=(None, None, None, None))
description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
@@ -110,10 +111,12 @@ class YleAreenaIE(InfoExtractor):
'ie_key': KalturaIE.ie_key(),
}
else:
formats, subs = self._extract_m3u8_formats_and_subtitles(
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
self._merge_subtitles(subs, target=subtitles)
info_dict = {
'id': video_id,
'formats': self._extract_m3u8_formats(
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls'),
'formats': formats,
}
return {
@@ -129,6 +132,6 @@ class YleAreenaIE(InfoExtractor):
or int_or_none(episode_number)),
'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
'subtitles': subtitles,
'subtitles': subtitles or None,
'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
}

View File

@@ -4,6 +4,7 @@ import collections
import copy
import datetime as dt
import enum
import functools
import hashlib
import itertools
import json
@@ -20,7 +21,6 @@ import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper
from ..compat import functools
from ..jsinterp import JSInterpreter
from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import (
@@ -468,7 +468,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
]
_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
_IGNORED_WARNINGS = {
'Unavailable videos will be hidden during playback',
'Unavailable videos are hidden',
}
_YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
_YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
@@ -3797,6 +3800,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
CHUNK_SIZE = 10 << 20
PREFERRED_LANG_VALUE = 10
original_language = None
itags, stream_ids = collections.defaultdict(set), []
itag_qualities, res_qualities = {}, {0: None}
q = qualities([
@@ -3845,6 +3850,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
itag_qualities[itag] = quality
if height:
res_qualities[height] = quality
is_default = audio_track.get('audioIsDefault')
is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
language_code = audio_track.get('id', '').split('.')[0]
if language_code and is_default:
original_language = language_code
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`)
@@ -3870,7 +3882,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
query = parse_qs(fmt_url)
throttled = False
if query.get('n'):
try:
decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
@@ -3884,20 +3895,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
if player_url:
self.report_warning(
f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
self.write_debug(e, only_once=True)
else:
self.report_warning(
'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
'Cannot decrypt nsig without player_url: Some formats may be missing',
video_id=video_id, only_once=True)
throttled = True
continue
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
language_preference = (
10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
@@ -3924,17 +3931,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
'format_note': join_nonempty(
join_nonempty(audio_track.get('displayName'),
language_preference > 0 and ' (default)', delim=''),
join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
is_damaged and 'DAMAGED', is_broken and 'BROKEN',
(self.get_param('verbose') or all_formats) and client_name,
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
+ (100 if 'Premium' in name else 0)),
'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
'audio_channels': fmt.get('audioChannels'),
'height': height,
@@ -3944,9 +3949,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'filesize_approx': filesize_from_tbr(tbr, format_duration),
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else '') or None,
'language_preference': language_preference,
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
# Strictly de-prioritize broken, damaged and 3gp formats
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
}
@@ -4007,6 +4011,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif itag:
f['format_id'] = itag
if original_language and f.get('language') == original_language:
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
f['language_preference'] = PREFERRED_LANG_VALUE
if f.get('source_preference') is None:
f['source_preference'] = -1
@@ -4351,7 +4359,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
'live_status': live_status,
'release_timestamp': live_start_time,
'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
}

View File

@@ -66,7 +66,9 @@ class ZaikoIE(ZaikoBaseIE):
stream_meta['stream-access']['video_source'], video_id,
'Downloading player page', headers={'referer': 'https://zaiko.io/'})
player_meta = self._parse_vue_element_attr('player', player_page, video_id)
status = traverse_obj(player_meta, ('initial_event_info', 'status', {str}))
initial_event_info = traverse_obj(player_meta, ('initial_event_info', {dict})) or {}
status = traverse_obj(initial_event_info, ('status', {str}))
live_status, msg, expected = {
'vod': ('was_live', 'No VOD stream URL was found', False),
'archiving': ('post_live', 'Event VOD is still being processed', True),
@@ -80,14 +82,20 @@ class ZaikoIE(ZaikoBaseIE):
'cancelled': ('not_live', 'Event has been cancelled', True),
}.get(status) or ('not_live', f'Unknown event status "{status}"', False)
stream_url = traverse_obj(player_meta, ('initial_event_info', 'endpoint', {url_or_none}))
if traverse_obj(initial_event_info, ('is_jwt_protected', {bool})):
stream_url = self._download_json(
initial_event_info['jwt_token_url'], video_id, 'Downloading JWT-protected stream URL',
'Failed to download JWT-protected stream URL')['playback_url']
else:
stream_url = traverse_obj(initial_event_info, ('endpoint', {url_or_none}))
formats = self._extract_m3u8_formats(
stream_url, video_id, live=True, fatal=False) if stream_url else []
if not formats:
self.raise_no_formats(msg, expected=expected)
thumbnail_urls = [
traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
traverse_obj(initial_event_info, ('poster_url', {url_or_none})),
self._og_search_thumbnail(self._download_webpage(
f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
]
@@ -103,9 +111,7 @@ class ZaikoIE(ZaikoBaseIE):
'release_timestamp': ('stream', 'start', 'timestamp', {int_or_none}),
'categories': ('event', 'genres', ..., {lambda x: x or None}),
}),
**traverse_obj(player_meta, ('initial_event_info', {
'alt_title': ('title', {str}),
})),
'alt_title': traverse_obj(initial_event_info, ('title', {str})),
'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)],
}

View File

@@ -667,12 +667,12 @@ class JSInterpreter:
self.interpret_expression(v, local_vars, allow_recursion)
for v in self._separate(arg_str)]
if obj == str:
if obj is str:
if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments')
return ''.join(map(chr, argvals))
raise self.Exception(f'Unsupported String method {member}', expr)
elif obj == float:
elif obj is float:
if member == 'pow':
assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1]

View File

@@ -230,9 +230,7 @@ class Urllib3LoggingFilter(logging.Filter):
def filter(self, record):
# Ignore HTTP request messages since HTTPConnection prints those
if record.msg == '%s://%s:%s "%s %s %s" %s %s':
return False
return True
return record.msg != '%s://%s:%s "%s %s %s" %s %s'
class Urllib3LoggingHandler(logging.Handler):

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import contextlib
import functools
import io
import logging
import ssl
@@ -22,7 +23,6 @@ from .exceptions import (
TransportError,
)
from .websocket import WebSocketRequestHandler, WebSocketResponse
from ..compat import functools
from ..dependencies import websockets
from ..socks import ProxyError as SocksProxyError
from ..utils import int_or_none

View File

@@ -474,10 +474,10 @@ def create_parser():
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
'prefer-legacy-http-handler', 'manifest-filesize-approx',
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext',
}, 'aliases': {
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext'],
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
'2023': [],
@@ -646,7 +646,7 @@ def create_parser():
'You can also simply specify a field to match if the field is present, '
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
'the filter matches if atleast one of the conditions are met. E.g. --match-filter '
'the filter matches if at least one of the conditions is met. E.g. --match-filter '
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
'matches only videos that are not live OR those that have a like count more than 100 '
'(or the like field is not available) and also has a description '
@@ -1479,7 +1479,7 @@ def create_parser():
'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, '
'the name/path of the PROFILE to load cookies from, '
'and the CONTAINER name (if Firefox) ("none" for no container) '
'can be given with their respective seperators. '
'can be given with their respective separators. '
'By default, all containers of the most recently accessed profile are used. '
f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}'))
filesystem.add_option(
@@ -1781,7 +1781,7 @@ def create_parser():
'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), '
'"video" (after --format; before --print/--output), "before_dl" (before each video download), '
'"post_process" (after each video download; default), '
'"after_move" (after moving video file to it\'s final locations), '
'"after_move" (after moving video file to its final locations), '
'"after_video" (after downloading and processing all formats of a video), '
'or "playlist" (at end of playlist). '
'This option can be used multiple times to add different postprocessors'))

View File

@@ -119,15 +119,22 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
if not mutagen or prefer_atomicparsley:
success = False
else:
self._report_run('mutagen', filename)
f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}
try:
self._report_run('mutagen', filename)
with open(thumbnail_filename, 'rb') as thumbfile:
thumb_data = thumbfile.read()
type_ = imghdr.what(h=thumb_data)
if not type_:
raise ValueError('could not determine image type')
elif type_ not in f:
raise ValueError(f'incompatible image type: {type_}')
meta = MP4(filename)
# NOTE: the 'covr' atom is a non-standard MPEG-4 atom,
# Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom.
f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}[imghdr.what(thumbnail_filename)]
with open(thumbnail_filename, 'rb') as thumbfile:
thumb_data = thumbfile.read()
meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f)]
meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f[type_])]
meta.save()
temp_filename = filename
except Exception as err:
@@ -160,9 +167,10 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode:
self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
success = False
# for formats that don't support thumbnails (like 3gp) AtomicParsley
# won't create to the temporary file
if 'No changes' in stdout:
elif 'No changes' in stdout:
self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False

View File

@@ -1,5 +1,6 @@
import collections
import contextvars
import functools
import itertools
import json
import os
@@ -8,7 +9,7 @@ import subprocess
import time
from .common import PostProcessor
from ..compat import functools, imghdr
from ..compat import imghdr
from ..utils import (
MEDIA_EXTENSIONS,
ISO639Utils,

View File

@@ -2085,19 +2085,20 @@ def parse_duration(s):
(days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
def prepend_extension(filename, ext, expected_real_ext=None):
def _change_extension(prepend, filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename)
return (
f'{name}.{ext}{real_ext}'
if not expected_real_ext or real_ext[1:] == expected_real_ext
else f'{filename}.{ext}')
if not expected_real_ext or real_ext[1:] == expected_real_ext:
filename = name
if prepend and real_ext:
_UnsafeExtensionError.sanitize_extension(ext, prepend=True)
return f'{filename}.{ext}{real_ext}'
return f'{filename}.{_UnsafeExtensionError.sanitize_extension(ext)}'
def replace_extension(filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename)
ext = ext if ext.startswith('.') else '.' + ext
return f'{name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename}{ext}'
prepend_extension = functools.partial(_change_extension, True)
replace_extension = functools.partial(_change_extension, False)
def check_executable(exe, args=[]):
@@ -5025,7 +5026,7 @@ MEDIA_EXTENSIONS = Namespace(
common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
thumbnails=('jpg', 'png', 'webp'),
storyboards=('mhtml', ),
subtitles=('srt', 'vtt', 'ass', 'lrc'),
@@ -5037,6 +5038,135 @@ MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
class _UnsafeExtensionError(Exception):
"""
Mitigation exception for uncommon/malicious file extensions
This should be caught in YoutubeDL.py alongside a warning
Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
"""
ALLOWED_EXTENSIONS = frozenset([
# internal
'description',
'json',
'meta',
'orig',
'part',
'temp',
'uncut',
'unknown_video',
'ytdl',
# video
*MEDIA_EXTENSIONS.video,
'asx',
'ismv',
'm2t',
'm2ts',
'm2v',
'm4s',
'mng',
'mp2v',
'mp4v',
'mpe',
'mpeg',
'mpeg1',
'mpeg2',
'mpeg4',
'mxf',
'ogm',
'qt',
'rm',
'swf',
'ts',
'vob',
'vp9',
# audio
*MEDIA_EXTENSIONS.audio,
'3ga',
'ac3',
'adts',
'aif',
'au',
'dts',
'isma',
'it',
'mid',
'mod',
'mpga',
'mp1',
'mp2',
'mp4a',
'mpa',
'ra',
'shn',
'xm',
# image
*MEDIA_EXTENSIONS.thumbnails,
'avif',
'bmp',
'gif',
'heic',
'ico',
'jng',
'jpeg',
'jxl',
'svg',
'tif',
'tiff',
'wbmp',
# subtitle
*MEDIA_EXTENSIONS.subtitles,
'dfxp',
'fs',
'ismt',
'json3',
'sami',
'scc',
'srv1',
'srv2',
'srv3',
'ssa',
'tt',
'ttml',
'xml',
# others
*MEDIA_EXTENSIONS.manifests,
*MEDIA_EXTENSIONS.storyboards,
'desktop',
'ism',
'm3u',
'sbv',
'url',
'webloc',
])
def __init__(self, extension, /):
super().__init__(f'unsafe file extension: {extension!r}')
self.extension = extension
@classmethod
def sanitize_extension(cls, extension, /, *, prepend=False):
if extension is None:
return None
if '/' in extension or '\\' in extension:
raise cls(extension)
if not prepend:
_, _, last = extension.rpartition('.')
if last == 'bin':
extension = last = 'unknown_video'
if last.lower() not in cls.ALLOWED_EXTENSIONS:
raise cls(extension)
return extension
class RetryManager:
"""Usage:
for retry in RetryManager(...):

View File

@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
__version__ = '2024.05.27'
__version__ = '2024.07.07'
RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b'
RELEASE_GIT_HEAD = 'b337d2989ce0614651d363383f6f743d977248ef'
VARIANT = None
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp'
_pkg_version = '2024.05.27'
_pkg_version = '2024.07.07'