mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-30 18:51:51 +00:00
Merge branch 'master' into GoogleDriveFolderFix
This may fix the failing ci
This commit is contained in:
@@ -235,6 +235,11 @@ def validate_options(opts):
|
||||
validate_regex('format sorting', f, FormatSorter.regex)
|
||||
|
||||
# Postprocessor formats
|
||||
if opts.convertsubtitles == 'none':
|
||||
opts.convertsubtitles = None
|
||||
if opts.convertthumbnails == 'none':
|
||||
opts.convertthumbnails = None
|
||||
|
||||
validate_regex('merge output format', opts.merge_output_format,
|
||||
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
|
||||
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
|
||||
|
||||
@@ -1053,8 +1053,9 @@ def _decrypt_windows_dpapi(ciphertext, logger):
|
||||
ctypes.byref(blob_out), # pDataOut
|
||||
)
|
||||
if not ret:
|
||||
logger.warning('failed to decrypt with DPAPI', only_once=True)
|
||||
return None
|
||||
message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
|
||||
result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
|
||||
ctypes.windll.kernel32.LocalFree(blob_out.pbData)
|
||||
|
||||
@@ -508,7 +508,7 @@ class FFmpegFD(ExternalFD):
|
||||
env = None
|
||||
proxy = self.params.get('proxy')
|
||||
if proxy:
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
if not re.match(r'[\da-zA-Z]+://', proxy):
|
||||
proxy = f'http://{proxy}'
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
@@ -559,7 +559,7 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
is_http = re.match(r'https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
|
||||
@@ -217,6 +217,7 @@ from .bbc import (
|
||||
BBCCoUkIPlayerGroupIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
)
|
||||
from .beacon import BeaconTvIE
|
||||
from .beatbump import (
|
||||
BeatBumpPlaylistIE,
|
||||
BeatBumpVideoIE,
|
||||
@@ -729,6 +730,7 @@ from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .germanupa import GermanupaIE
|
||||
from .getcourseru import (
|
||||
GetCourseRuIE,
|
||||
GetCourseRuPlayerIE,
|
||||
@@ -822,7 +824,10 @@ from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .huya import HuyaLiveIE
|
||||
from .huya import (
|
||||
HuyaLiveIE,
|
||||
HuyaVideoIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .hypergryph import MonsterSirenHypergryphMusicIE
|
||||
from .hytale import HytaleIE
|
||||
@@ -939,11 +944,13 @@ from .khanacademy import (
|
||||
KhanAcademyUnitIE,
|
||||
)
|
||||
from .kick import (
|
||||
KickClipIE,
|
||||
KickIE,
|
||||
KickVODIE,
|
||||
)
|
||||
from .kicker import KickerIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kika import KikaIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
@@ -986,6 +993,7 @@ from .lcp import (
|
||||
LcpIE,
|
||||
LcpPlayIE,
|
||||
)
|
||||
from .learningonscreen import LearningOnScreenIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lecturio import (
|
||||
LecturioCourseIE,
|
||||
@@ -1034,10 +1042,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .livestreamfails import LivestreamfailsIE
|
||||
from .lnkgo import (
|
||||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .lnk import LnkIE
|
||||
from .loom import (
|
||||
LoomFolderIE,
|
||||
LoomIE,
|
||||
@@ -1162,6 +1167,7 @@ from .mlb import (
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mocha import MochaVideoIE
|
||||
from .mojevideo import MojevideoIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .motherless import (
|
||||
@@ -1808,6 +1814,7 @@ from .screen9 import Screen9IE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastify import ScreencastifyIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenrec import ScreenRecIE
|
||||
from .scrippsnetworks import (
|
||||
ScrippsNetworksIE,
|
||||
ScrippsNetworksWatchIE,
|
||||
@@ -1818,6 +1825,7 @@ from .scte import (
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .sejmpl import SejmIE
|
||||
from .sen import SenIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import (
|
||||
SenateGovIE,
|
||||
@@ -1873,6 +1881,7 @@ from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snapchat import SnapchatSpotlightIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
@@ -2169,10 +2178,7 @@ from .tv5unis import (
|
||||
TV5UnisVideoIE,
|
||||
)
|
||||
from .tv24ua import TV24UAVideoIE
|
||||
from .tva import (
|
||||
TVAIE,
|
||||
QubIE,
|
||||
)
|
||||
from .tva import TVAIE
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesArticleIE,
|
||||
TVANouvellesIE,
|
||||
@@ -2312,6 +2318,7 @@ from .videomore import (
|
||||
VideomoreVideoIE,
|
||||
)
|
||||
from .videopress import VideoPressIE
|
||||
from .vidflex import VidflexIE
|
||||
from .vidio import (
|
||||
VidioIE,
|
||||
VidioLiveIE,
|
||||
|
||||
@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'skip': 'This program is not currently available in ABC iview',
|
||||
}, {
|
||||
'url': 'https://iview.abc.net.au/show/inbestigators',
|
||||
'info_dict': {
|
||||
'id': '175343-1',
|
||||
'title': 'Series 1',
|
||||
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
|
||||
'series': 'The Inbestigators',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage_data = self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
video_data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
|
||||
transform_source=lambda x: x.encode().decode('unicode_escape'),
|
||||
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
|
||||
@@ -9,12 +9,12 @@ import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..networking import RequestHandler, Response
|
||||
from ..networking.exceptions import TransportError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -26,37 +26,36 @@ from ..utils import (
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
headers = ydl.params['http_headers'].copy()
|
||||
proxies = ydl.proxies.copy()
|
||||
clean_proxies(proxies, headers)
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
class AbemaLicenseRH(RequestHandler):
|
||||
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
|
||||
_SUPPORTED_PROXY_SCHEMES = None
|
||||
_SUPPORTED_FEATURES = None
|
||||
RH_NAME = 'abematv_license'
|
||||
|
||||
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
handler_order = 499
|
||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, ie: 'AbemaTVIE'):
|
||||
# the protocol that this should really handle is 'abematv-license://'
|
||||
# abematv_license_open is just a placeholder for development purposes
|
||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
|
||||
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.ie = ie
|
||||
|
||||
def _send(self, request):
|
||||
url = request.url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
|
||||
try:
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
except ExtractorError as e:
|
||||
raise TransportError(cause=e.cause) from e
|
||||
except (IndexError, KeyError, TypeError) as e:
|
||||
raise TransportError(cause=repr(e)) from e
|
||||
|
||||
return Response(
|
||||
io.BytesIO(response_data), url,
|
||||
headers={'Content-Length': str(len(response_data))})
|
||||
|
||||
def _get_videokey_from_ticket(self, ticket):
|
||||
to_show = self.ie.get_param('verbose', False)
|
||||
media_token = self.ie._get_media_token(to_show=to_show)
|
||||
@@ -72,25 +71,17 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self.STRTABLE)
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self.HKEY),
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
@@ -139,7 +130,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
@@ -386,8 +377,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
|
||||
if 3 not in ondemand_types:
|
||||
if not traverse_obj(api_response, ('label', 'free', {bool})):
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
|
||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
||||
@@ -49,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '9841',
|
||||
@@ -71,10 +71,7 @@ class ADNIE(ADNBaseIE):
|
||||
},
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
@@ -167,7 +164,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'username': username,
|
||||
})) or {}).get('accessToken')
|
||||
if access_token:
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
self._HEADERS['Authorization'] = f'Bearer {access_token}'
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
@@ -178,6 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
@@ -218,7 +216,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang or 'fr',
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
@@ -257,6 +254,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
f'Downloading {format_id} {quality} JSON metadata',
|
||||
headers=self._HEADERS,
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
@@ -277,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + f'video/{video_id}', video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
@@ -299,9 +297,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new',
|
||||
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
@@ -312,16 +310,14 @@ class ADNSeasonIE(ADNBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang or 'fr',
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'Downloading episode list', headers=self._HEADERS, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
@@ -1,27 +1,42 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'duration': 5369,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
@@ -470,7 +470,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
|
||||
@@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
|
||||
self._HEADERS['Authorization'] = f'Bearer {token}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
|
||||
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
|
||||
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
|
||||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import functools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -6,7 +8,9 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
@@ -17,6 +21,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
@@ -459,7 +464,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
'title': 'Discography of coldworldofficial',
|
||||
@@ -473,12 +478,19 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _yield_items(self, webpage):
|
||||
yield from (
|
||||
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
yield from traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
|
||||
'data-client-items', {json.loads}, ..., 'page_url', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader)
|
||||
|
||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
||||
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||
getter=functools.partial(urljoin, url))
|
||||
|
||||
68
yt_dlp/extractor/beacon.py
Normal file
68
yt_dlp/extractor/beacon.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BeaconTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://beacon.tv/content/welcome-to-beacon',
|
||||
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
|
||||
'info_dict': {
|
||||
'id': 'welcome-to-beacon',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240509',
|
||||
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
|
||||
'title': 'Your home for Critical Role!',
|
||||
'timestamp': 1715227200,
|
||||
'duration': 105.494,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
|
||||
'md5': 'd879b091485dbed2245094c8152afd89',
|
||||
'info_dict': {
|
||||
'id': 're-slayers-take-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Re-Slayer’s Take | Official Trailer',
|
||||
'timestamp': 1715189040,
|
||||
'upload_date': '20240508',
|
||||
'duration': 53.249,
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', '__APOLLO_STATE__',
|
||||
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
|
||||
if not content_data:
|
||||
raise ExtractorError('Failed to extract content data')
|
||||
|
||||
jwplayer_data = traverse_obj(content_data, (
|
||||
(('contentVideo', 'video', 'videoData'),
|
||||
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
|
||||
if not jwplayer_data:
|
||||
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
|
||||
raise ExtractorError('Content is not a video/podcast', expected=True)
|
||||
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
|
||||
self.raise_login_required('This video/podcast is for members only')
|
||||
raise ExtractorError('Failed to extract content')
|
||||
|
||||
return {
|
||||
**self._parse_jwplayer_data(jwplayer_data, video_id),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
@@ -46,6 +46,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||
_wbi_key_cache = {}
|
||||
@@ -192,7 +193,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}')
|
||||
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
self.report_warning(
|
||||
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||
@@ -207,7 +208,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False)
|
||||
note='Extracting chapters', fatal=False, headers=self._HEADERS)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
'start_time': 'from',
|
||||
@@ -298,7 +299,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
@@ -622,6 +623,10 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'geo-restricted',
|
||||
}, {
|
||||
'note': 'has - in the last path segment of the url',
|
||||
'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -1017,8 +1022,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BilibiliCheeseBaseIE(BilibiliBaseIE):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _extract_episode(self, season_info, ep_id):
|
||||
episode_info = traverse_obj(season_info, (
|
||||
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
|
||||
@@ -1848,7 +1851,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
|
||||
@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
@@ -6,17 +7,24 @@ import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
replace_extension,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
update_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -149,6 +157,7 @@ class CBCIE(InfoExtractor):
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||
@@ -172,21 +181,20 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||
'tags': 'count:8',
|
||||
'categories': ['All in a Weekend Montreal'],
|
||||
'tags': 'count:11',
|
||||
'location': 'Quebec',
|
||||
'series': 'All in a Weekend Montreal',
|
||||
'season': 'Season 2015',
|
||||
'season_number': 2015,
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['Other'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
|
||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'ext': 'mp4',
|
||||
@@ -194,107 +202,168 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
'series': 'CBC News: Windsor at 6:00',
|
||||
'categories': ['News/Canada/Windsor'],
|
||||
'categories': ['Windsor'],
|
||||
'location': 'Windsor',
|
||||
'tags': ['cancer'],
|
||||
'creators': ['Allison Johnson'],
|
||||
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['News'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'https://www.cbc.ca/player/play/1.2985700',
|
||||
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||
'info_dict': {
|
||||
'id': '2657631896',
|
||||
'id': '1.2985700',
|
||||
'ext': 'mp3',
|
||||
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
||||
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||
'tags': 'count:8',
|
||||
'categories': ['All in a Weekend Montreal'],
|
||||
'tags': 'count:11',
|
||||
'location': 'Quebec',
|
||||
'series': 'All in a Weekend Montreal',
|
||||
'season': 'Season 2015',
|
||||
'season_number': 2015,
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['Other'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/1.1711287',
|
||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'id': '1.1711287',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cancer survivor four times over',
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
'series': 'CBC News: Windsor at 6:00',
|
||||
'categories': ['News/Canada/Windsor'],
|
||||
'categories': ['Windsor'],
|
||||
'location': 'Windsor',
|
||||
'tags': ['cancer'],
|
||||
'creators': ['Allison Johnson'],
|
||||
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['News'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'https://www.cbc.ca/player/play/1.7159484',
|
||||
'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6424403',
|
||||
'md5': '8025909eaffcf0adf59922904def9a5e',
|
||||
'info_dict': {
|
||||
'id': '2324213316001',
|
||||
'id': '9.6424403',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | School boards sue social media giants',
|
||||
'description': 'md5:4b4db69322fa32186c3ce426da07402c',
|
||||
'timestamp': 1711681200,
|
||||
'duration': 2743.400,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'title': 'The National | N.W.T. wildfire emergency',
|
||||
'description': 'md5:ada33d36d1df69347ed575905bfd496c',
|
||||
'timestamp': 1718589600,
|
||||
'duration': 2692.833,
|
||||
'subtitles': {
|
||||
'en-US': [{
|
||||
'name': 'English Captions',
|
||||
'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt',
|
||||
}],
|
||||
},
|
||||
'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20240329',
|
||||
'categories': 'count:4',
|
||||
'upload_date': '20240617',
|
||||
'categories': ['News', 'The National', 'The National Latest Broadcasts'],
|
||||
'series': 'The National - Full Show',
|
||||
'tags': 'count:1',
|
||||
'creators': ['News'],
|
||||
'tags': ['The National'],
|
||||
'location': 'Canada',
|
||||
'media_type': 'Full Program',
|
||||
'genres': ['News'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
|
||||
'md5': '188b96cf6bdcb2540e178a6caa957128',
|
||||
'info_dict': {
|
||||
'id': '2334524995812',
|
||||
'id': '1.7194274',
|
||||
'ext': 'mp4',
|
||||
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
|
||||
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
|
||||
'timestamp': 1714788791,
|
||||
'duration': 77.678,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:0',
|
||||
'upload_date': '20240504',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'categories': 'count:3',
|
||||
'series': 'The National',
|
||||
'tags': 'count:15',
|
||||
'creators': ['encoder'],
|
||||
'tags': 'count:17',
|
||||
'location': 'Canada',
|
||||
'media_type': 'Excerpt',
|
||||
'upload_date': '20240504',
|
||||
'genres': ['News'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6427282',
|
||||
'info_dict': {
|
||||
'id': '9.6427282',
|
||||
'ext': 'mp4',
|
||||
'title': 'Men\'s Soccer - Argentina vs Morocco',
|
||||
'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
|
||||
'series': 'CBC Sports',
|
||||
'media_type': 'Event Coverage',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
|
||||
'timestamp': 1721825400.0,
|
||||
'upload_date': '20240724',
|
||||
'duration': 10568.0,
|
||||
'chapters': [],
|
||||
'genres': [],
|
||||
'tags': ['2024 Paris Olympic Games'],
|
||||
'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
|
||||
'location': 'Canada',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6459530',
|
||||
'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
|
||||
'info_dict': {
|
||||
'id': '9.6459530',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parts of Jasper incinerated as wildfire rages',
|
||||
'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
|
||||
'series': 'The National',
|
||||
'media_type': 'Excerpt',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
|
||||
'timestamp': 1721964091.012,
|
||||
'upload_date': '20240726',
|
||||
'duration': 952.285,
|
||||
'chapters': [],
|
||||
'genres': [],
|
||||
'tags': 'count:23',
|
||||
'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6420651',
|
||||
'md5': '71a850c2c6ee5e912de169f5311bb533',
|
||||
'info_dict': {
|
||||
'id': '9.6420651',
|
||||
'ext': 'mp4',
|
||||
'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
|
||||
'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
|
||||
'series': 'CBC News Edmonton',
|
||||
'media_type': 'Excerpt',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
|
||||
'timestamp': 1718220065.768,
|
||||
'upload_date': '20240612',
|
||||
'duration': 286.086,
|
||||
'chapters': [],
|
||||
'genres': ['News'],
|
||||
'categories': ['News', 'Edmonton'],
|
||||
'tags': 'count:7',
|
||||
'location': 'Edmonton',
|
||||
},
|
||||
}, {
|
||||
'url': 'cbcplayer:1.7159484',
|
||||
@@ -307,23 +376,113 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_param(self, asset_data, name):
|
||||
return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if '.' in video_id:
|
||||
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
||||
video_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage,
|
||||
'initial state', video_id)['video']['currentClip']['mediaId']
|
||||
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
||||
data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip']
|
||||
assets = traverse_obj(
|
||||
data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type']))
|
||||
|
||||
if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))):
|
||||
# XXX: Deprecated; CBC is migrating off of ThePlatform
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
|
||||
'force_smil_url': True,
|
||||
}),
|
||||
'id': media_id,
|
||||
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
|
||||
}
|
||||
|
||||
is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live'
|
||||
formats, subtitles = [], {}
|
||||
|
||||
for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))):
|
||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['src'],
|
||||
'name': sub.get('label'),
|
||||
})
|
||||
|
||||
for asset in assets:
|
||||
asset_key = asset['key']
|
||||
asset_type = asset['type']
|
||||
if asset_type != 'medianet':
|
||||
self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}')
|
||||
continue
|
||||
asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON')
|
||||
ext = mimetype2ext(self._parse_param(asset_data, 'contentType'))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live)
|
||||
formats.extend(fmts)
|
||||
# Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
|
||||
if not subtitles:
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if is_live or not fmts:
|
||||
continue
|
||||
# Check for direct https mp4 format
|
||||
best_video_fmt = traverse_obj(fmts, (
|
||||
lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all,
|
||||
{functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {}
|
||||
base_url = self._search_regex(
|
||||
r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None)
|
||||
if not base_url or '/live/' in base_url:
|
||||
continue
|
||||
mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4')
|
||||
if self._request_webpage(
|
||||
HEADRequest(mp4_url), video_id, 'Checking for https format',
|
||||
errnote=False, fatal=False):
|
||||
formats.append({
|
||||
**best_video_fmt,
|
||||
'url': mp4_url,
|
||||
'format_id': 'https-mp4',
|
||||
'protocol': 'https',
|
||||
'manifest_url': None,
|
||||
'acodec': None,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': asset_data['url'],
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None,
|
||||
})
|
||||
|
||||
chapters = traverse_obj(data, (
|
||||
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
|
||||
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'title': ('name', {str}),
|
||||
}))
|
||||
# Filter out pointless single chapters with start_time==0 and no end_time
|
||||
if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')):
|
||||
chapters = []
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', {
|
||||
'force_smil_url': True,
|
||||
}),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
|
||||
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'media_type': ('media', 'clipType', {str}),
|
||||
'series': ('showName', {str}),
|
||||
'season_number': ('media', 'season', {int_or_none}),
|
||||
'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}),
|
||||
'location': ('media', 'region', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'genres': ('media', 'genre', all),
|
||||
'categories': ('categories', ..., 'name', {str}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
@@ -647,11 +806,11 @@ class CBCGemLiveIE(InfoExtractor):
|
||||
'title': 'Ottawa',
|
||||
'description': 'The live TV channel and local programming from Ottawa',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
'id': 'AyqZwxRqh8EH',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492106160,
|
||||
'upload_date': '20170413',
|
||||
'release_timestamp': 1492106160,
|
||||
'release_date': '20170413',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Live might have ended',
|
||||
@@ -680,49 +839,84 @@ class CBCGemLiveIE(InfoExtractor):
|
||||
'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
|
||||
'timestamp': 1679706000,
|
||||
'upload_date': '20230325',
|
||||
'release_timestamp': 1679706000,
|
||||
'release_date': '20230325',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Live might have ended',
|
||||
},
|
||||
{ # event replay (medianetlive)
|
||||
'url': 'https://gem.cbc.ca/live-event/42314',
|
||||
'md5': '297a9600f554f2258aed01514226a697',
|
||||
'info_dict': {
|
||||
'id': '42314',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'was_live',
|
||||
'title': 'Women\'s Soccer - Canada vs New Zealand',
|
||||
'description': 'md5:36200e5f1a70982277b5a6ecea86155d',
|
||||
'thumbnail': r're:https://.+default\.jpg',
|
||||
'release_timestamp': 1721917200,
|
||||
'release_date': '20240725',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Replay might no longer be available',
|
||||
},
|
||||
{ # event replay (medianetlive)
|
||||
'url': 'https://gem.cbc.ca/live-event/43273',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
|
||||
|
||||
# Two types of metadata JSON
|
||||
# Three types of video_info JSON: info in root, freeTv stream/item, event replay
|
||||
if not video_info.get('formattedIdMedia'):
|
||||
video_info = traverse_obj(
|
||||
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
|
||||
get_all=False, default={})
|
||||
if traverse_obj(video_info, ('event', 'key')) == video_id:
|
||||
video_info = video_info['event']
|
||||
else:
|
||||
video_info = traverse_obj(video_info, (
|
||||
('freeTv', ('streams', ...)), 'items',
|
||||
lambda _, v: v['key'].partition('-')[0] == video_id, any)) or {}
|
||||
|
||||
video_stream_id = video_info.get('formattedIdMedia')
|
||||
if not video_stream_id:
|
||||
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||
raise ExtractorError(
|
||||
'Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||
|
||||
stream_data = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||
'appCode': 'mpx',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'idMedia': video_stream_id,
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestType': 'desktop',
|
||||
})
|
||||
live_status = 'was_live' if video_info.get('isVodEnabled') else 'is_live'
|
||||
release_timestamp = traverse_obj(video_info, ('airDate', {parse_iso8601}))
|
||||
|
||||
if live_status == 'is_live' and release_timestamp and release_timestamp > time.time():
|
||||
formats = []
|
||||
live_status = 'is_upcoming'
|
||||
self.raise_no_formats('This livestream has not yet started', expected=True)
|
||||
else:
|
||||
stream_data = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||
'appCode': 'medianetlive',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'idMedia': video_stream_id,
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestType': 'desktop',
|
||||
})
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_data['url'], video_id, 'mp4', live=live_status == 'is_live')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': release_timestamp,
|
||||
**traverse_obj(video_info, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('images', 'card', 'url'),
|
||||
'timestamp': ('airDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
TransportError,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
@@ -965,6 +966,9 @@ class InfoExtractor:
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
if content is False:
|
||||
assert not fatal
|
||||
return False
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
@@ -1039,7 +1043,15 @@ class InfoExtractor:
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
try:
|
||||
webpage_bytes = urlh.read()
|
||||
except TransportError as err:
|
||||
errmsg = f'{video_id}: Error reading response: {err.msg}'
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, cause=err)
|
||||
self.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
@@ -1698,7 +1710,7 @@ class InfoExtractor:
|
||||
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
|
||||
if rating is not None:
|
||||
info['average_rating'] = rating
|
||||
if is_type(e, 'TVEpisode', 'Episode'):
|
||||
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': episode_name,
|
||||
@@ -2065,7 +2077,7 @@ class InfoExtractor:
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
@@ -2800,11 +2812,11 @@ class InfoExtractor:
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if try_call(lambda: base_url_e.text) is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
if re.match(r'https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and base_url.startswith('/'):
|
||||
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
||||
elif mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
elif mpd_base_url and not re.match(r'https?://', base_url):
|
||||
if not mpd_base_url.endswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
@@ -2894,7 +2906,7 @@ class InfoExtractor:
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
return 'url' if re.match(r'https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
@@ -3150,7 +3162,7 @@ class InfoExtractor:
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None):
|
||||
def absolute_url(item_url):
|
||||
return urljoin(base_url, item_url)
|
||||
|
||||
@@ -3174,11 +3186,11 @@ class InfoExtractor:
|
||||
formats = self._extract_m3u8_formats(
|
||||
full_url, video_id, ext='mp4',
|
||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
||||
preference=preference, quality=quality, fatal=False)
|
||||
preference=preference, quality=quality, fatal=False, headers=_headers)
|
||||
elif ext == 'mpd':
|
||||
is_plain_url = False
|
||||
formats = self._extract_mpd_formats(
|
||||
full_url, video_id, mpd_id=mpd_id, fatal=False)
|
||||
full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers)
|
||||
else:
|
||||
is_plain_url = True
|
||||
formats = [{
|
||||
@@ -3272,6 +3284,8 @@ class InfoExtractor:
|
||||
})
|
||||
for f in media_info['formats']:
|
||||
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||
if _headers:
|
||||
f['http_headers'].update(_headers)
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
@@ -3487,7 +3501,7 @@ class InfoExtractor:
|
||||
continue
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
|
||||
@@ -319,32 +319,6 @@ class DPlayIE(DPlayBaseIE):
|
||||
url, display_id, host, 'dplay' + country, country, domain)
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
|
||||
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
|
||||
|
||||
@@ -373,6 +347,45 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class HGTVDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'info_dict': {
|
||||
'id': '7332936',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'title': 'Vom Landleben ins Loft',
|
||||
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
|
||||
'episode': 'Episode 7',
|
||||
'episode_number': 7,
|
||||
'season': 'Season 7',
|
||||
'season_number': 7,
|
||||
'series': 'Mein Kleinstadt-Traumhaus',
|
||||
'duration': 2645.0,
|
||||
'timestamp': 1725998100,
|
||||
'upload_date': '20240910',
|
||||
'creators': ['HGTV'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
|
||||
},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod.disco-api.com',
|
||||
'realm': 'hgtv',
|
||||
'country': 'de',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
@@ -934,7 +947,7 @@ class TLCIE(DiscoveryPlusBaseIE):
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport)?' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
'info_dict': {
|
||||
@@ -958,6 +971,9 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = None
|
||||
@@ -1144,13 +1160,19 @@ class DiscoveryPlusShowBaseIE(DPlayBaseIE):
|
||||
|
||||
|
||||
class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/olympics/dplus-sport-dplus-sport-sport/water-polo-greece-italy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/sport/dplus-sport-dplus-sport-sport/lisa-vittozzi-allinferno-e-ritorno',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus_it'
|
||||
|
||||
@@ -6,8 +6,10 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,43 +38,58 @@ class DropboxIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def _yield_decoded_parts(self, webpage):
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = urllib.parse.unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if '/sm/password' in part:
|
||||
webpage = self._download_webpage(
|
||||
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
|
||||
break
|
||||
|
||||
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
if password:
|
||||
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
|
||||
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
|
||||
'password': password,
|
||||
'url': url,
|
||||
}))
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Authentication failed!', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
else:
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles, has_anonymous_download = [], {}, False
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
formats, subtitles = [], {}
|
||||
has_anonymous_download = False
|
||||
thumbnail = None
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if not has_anonymous_download:
|
||||
has_anonymous_download = self._search_regex(
|
||||
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
thumbnail = self._search_regex(
|
||||
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
|
||||
break
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
@@ -89,4 +106,5 @@ class DropboxIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
@@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
|
||||
def _extract_formats_and_subs(self, video_id):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(media_info, (
|
||||
'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
|
||||
'Formats', lambda _, v: url_or_none(v['Url']))):
|
||||
fmt_url = media['Url']
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return formats, subs
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
|
||||
class WatchESPNIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'info_dict': {
|
||||
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huddersfield vs. Burnley',
|
||||
'duration': 7500,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
'title': 'Abilene Chrstn vs. Texas Tech',
|
||||
'duration': 14166,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'info_dict': {
|
||||
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
|
||||
'duration': 8335,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'UC Davis vs. California',
|
||||
'duration': 9547,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'info_dict': {
|
||||
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Wheel - Episode 10',
|
||||
'duration': 3352,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'The College Football Show',
|
||||
'duration': 3639,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
|
||||
if not cookie:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
|
||||
id_token = self._download_json(
|
||||
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
|
||||
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
|
||||
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
|
||||
}).encode())['data']['token']['id_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'devices', video_id,
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'},
|
||||
@@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
|
||||
})['access_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
|
||||
'accounts/grant', video_id, payload={'id_token': id_token},
|
||||
headers={
|
||||
'Authorization': token,
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
|
||||
@@ -3,7 +3,12 @@ from ..utils import traverse_obj
|
||||
|
||||
|
||||
class EurosportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
|
||||
eurosport\.tvn24\.pl
|
||||
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||
'info_dict': {
|
||||
@@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
|
||||
'duration': 105.0,
|
||||
'upload_date': '20230518',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = None
|
||||
@@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
|
||||
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
|
||||
# but this method require to get sha256 hash
|
||||
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_initialize(self):
|
||||
if EurosportIE._TOKEN is None:
|
||||
@@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
|
||||
for stream_type in json_data['attributes']['streaming']:
|
||||
if stream_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
|
||||
elif stream_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
elif stream_type == 'mss':
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
@@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'duration': 3133.583,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
@@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
@@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'md5': '1659aa21fb3dd1585874f668e81a72c8',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
'duration': 44.181,
|
||||
},
|
||||
}, {
|
||||
# FIXME: unable to extract uploader, no formats found
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
@@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1511548260,
|
||||
'upload_date': '20171124',
|
||||
'uploader': 'Vickie Gentry',
|
||||
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
'duration': 148.224,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
@@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
@@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'duration': 4524.001,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
@@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'duration': 3.283,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
@@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# FIXME: Cannot parse data error
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
@@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
or get_first(post, ('event', 'event_creator', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
@@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
|
||||
'like_count': ('likers', 'count', {int}),
|
||||
'comment_count': ('total_comment_count', {int}),
|
||||
'repost_count': ('share_count_reduced', {parse_count}),
|
||||
}), get_all=False),
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@@ -571,16 +580,21 @@ class FacebookIE(InfoExtractor):
|
||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
rf'data-sjs>({{.*?{_filter}.*?}})</script>',
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
def yield_all_relay_data(_filter):
|
||||
for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage):
|
||||
yield self._parse_json(relay_data, video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
def extract_relay_data(_filter):
|
||||
return next(filter(None, yield_all_relay_data(_filter)), {})
|
||||
|
||||
def extract_relay_prefetched_data(_filter, target_keys=None):
|
||||
path = 'data'
|
||||
if target_keys is not None:
|
||||
path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys))
|
||||
return traverse_obj(yield_all_relay_data(_filter), (
|
||||
..., 'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
||||
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
@@ -591,7 +605,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
if not video_data:
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)',
|
||||
target_keys=('video', 'event', 'nodes', 'node', 'mediaset'))
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
@@ -926,18 +941,21 @@ class FacebookReelIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -957,6 +975,7 @@ class FacebookAdsIE(InfoExtractor):
|
||||
'id': '899206155126718',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by Kandao',
|
||||
'description': 'md5:0822724069e3aca97cbed5dabbab282e',
|
||||
'uploader': 'Kandao',
|
||||
'uploader_id': '774114102743284',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
@@ -965,6 +984,22 @@ class FacebookAdsIE(InfoExtractor):
|
||||
'upload_date': '20231214',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# key 'watermarked_video_sd_url' missing
|
||||
'url': 'https://www.facebook.com/ads/library/?id=501152689226254',
|
||||
'info_dict': {
|
||||
'id': '501152689226254',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by mat.nawrocki',
|
||||
'description': 'md5:02a446ace7ff8c3c37a2892922492490',
|
||||
'uploader': 'mat.nawrocki',
|
||||
'uploader_id': '148586968341456',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1723452305,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20240812',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
|
||||
'info_dict': {
|
||||
@@ -1011,34 +1046,42 @@ class FacebookAdsIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
post_data = [self._parse_json(j, video_id, fatal=False)
|
||||
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
|
||||
data = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
|
||||
post_data = traverse_obj(
|
||||
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
|
||||
data = get_first(post_data, (
|
||||
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
|
||||
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
|
||||
if not data:
|
||||
raise ExtractorError('Unable to extract ad data')
|
||||
|
||||
title = data.get('title')
|
||||
if not title or title == '{{product.name}}':
|
||||
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
|
||||
markup_id = traverse_obj(data, ('body', '__m', {str}))
|
||||
markup = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'markup', lambda _, v: v[0].startswith(markup_id),
|
||||
..., '__html', {clean_html}, {lambda x: not x.startswith('{{product.') and x}, any))
|
||||
|
||||
info_dict = traverse_obj(data, {
|
||||
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
|
||||
info_dict = merge_dicts({
|
||||
'title': title,
|
||||
'description': markup or None,
|
||||
}, traverse_obj(data, {
|
||||
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
|
||||
'uploader': ('page_name', {str}),
|
||||
'uploader_id': ('page_id', {str_or_none}),
|
||||
'uploader_url': ('page_profile_uri', {url_or_none}),
|
||||
'timestamp': ('creation_time', {int_or_none}),
|
||||
'like_count': ('page_like_count', {int_or_none}),
|
||||
})
|
||||
}))
|
||||
|
||||
entries = []
|
||||
for idx, entry in enumerate(traverse_obj(
|
||||
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1,
|
||||
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v.get(f)) for f in self._FORMATS_MAP))), 1,
|
||||
):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{idx}',
|
||||
'title': entry.get('title') or title,
|
||||
'description': entry.get('link_description') or info_dict.get('description'),
|
||||
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
|
||||
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
|
||||
'formats': self._extract_formats(entry),
|
||||
})
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -2340,7 +2340,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
if re.match(r'[^\s/]+\.[^\s/]+/', url):
|
||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2400,7 +2400,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||
|
||||
91
yt_dlp/extractor/germanupa.py
Normal file
91
yt_dlp/extractor/germanupa.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GermanupaIE(InfoExtractor):
|
||||
IE_DESC = 'germanupa.de'
|
||||
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
|
||||
'info_dict': {
|
||||
'id': '909179246',
|
||||
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'duration': 3987,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'audio, uses GenericIE',
|
||||
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
|
||||
'info_dict': {
|
||||
'id': '1867346676',
|
||||
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
|
||||
'ext': 'opus',
|
||||
'timestamp': 1720545088,
|
||||
'upload_date': '20240709',
|
||||
'duration': 3910.557,
|
||||
'like_count': int,
|
||||
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
|
||||
'uploader': 'German UPA',
|
||||
'repost_count': int,
|
||||
'genres': ['Science'],
|
||||
'license': 'all-rights-reserved',
|
||||
'uploader_url': 'https://soundcloud.com/user-80097677',
|
||||
'uploader_id': '471579486',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
|
||||
},
|
||||
}, {
|
||||
'note': 'Nur für Mitglieder/Just for members',
|
||||
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
|
||||
'info_dict': {
|
||||
'id': '986994430',
|
||||
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20240719',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'timestamp': 1721373980,
|
||||
'license': 'by-sa',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
|
||||
'duration': 2146,
|
||||
'release_timestamp': 1721373980,
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'upload_date': '20240719',
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
param_url = traverse_obj(
|
||||
self._search_regex(
|
||||
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
|
||||
webpage, 'embedded video', default=None, group='url'),
|
||||
({parse_qs}, 'url', 0, {url_or_none}))
|
||||
|
||||
if not param_url:
|
||||
if self._search_regex(
|
||||
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
|
||||
webpage, 'login wrapper', default=None):
|
||||
self.raise_login_required('This video is only available for members')
|
||||
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
|
||||
|
||||
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
|
||||
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)
|
||||
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GolemIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
|
||||
@@ -8,15 +8,19 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
IE_NAME = 'huya:live'
|
||||
IE_DESC = 'huya.com'
|
||||
TESTS = [{
|
||||
@@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '572329',
|
||||
'title': str,
|
||||
'ext': 'flv',
|
||||
'description': str,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
@@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
|
||||
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
|
||||
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
|
||||
return fm, ss
|
||||
|
||||
|
||||
class HuyaVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
|
||||
IE_NAME = 'huya:video'
|
||||
IE_DESC = '虎牙视频'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.huya.com/video/play/1002412640.html',
|
||||
'info_dict': {
|
||||
'id': '1002412640',
|
||||
'ext': 'mp4',
|
||||
'title': '8月3日',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 14,
|
||||
'uploader': '虎牙-ATS欧卡车队青木',
|
||||
'uploader_id': '1564376151',
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.huya.com/video/play/556054543.html',
|
||||
'info_dict': {
|
||||
'id': '556054543',
|
||||
'ext': 'mp4',
|
||||
'title': '我不挑事 也不怕事',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 1864,
|
||||
'uploader': '卡尔',
|
||||
'uploader_id': '367138632',
|
||||
'upload_date': '20210811',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url: str):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent', video_id,
|
||||
query={'videoId': video_id})['data']['moment']['videoInfo']
|
||||
|
||||
formats = []
|
||||
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': definition['url'],
|
||||
**traverse_obj(definition, {
|
||||
'format_id': ('defName', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('videoCover', {url_or_none}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'uploader': ('nickName', {str}),
|
||||
'uploader_id': ('uid', {str_or_none}),
|
||||
'upload_date': ('videoUploadTime', {unified_strdate}),
|
||||
'view_count': ('videoPlayNum', {int_or_none}),
|
||||
'comment_count': ('videoCommentNum', {int_or_none}),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
|
||||
'id': 'p51388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
|
||||
'upload_date': '20201103',
|
||||
'timestamp': 1604437480,
|
||||
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
|
||||
'episode': 'Partička (92)',
|
||||
'season': 'Partička',
|
||||
'series': 'Prima Partička',
|
||||
'episode_number': 92,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
|
||||
'info_dict': {
|
||||
'id': 'p1412199',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 3,
|
||||
'episode': 'Tenerife: V říši ohně',
|
||||
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
|
||||
'duration': 3111.0,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
|
||||
'title': 'Tenerife: V říši ohně',
|
||||
'timestamp': 1711825800,
|
||||
'upload_date': '20240330',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
@@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
|
||||
video_id = self._search_regex((
|
||||
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
@@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
|
||||
final_result = self._search_json_ld(webpage, video_id, default={})
|
||||
final_result.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': final_result.get('title') or title,
|
||||
'thumbnail': self._html_search_meta(
|
||||
['thumbnail', 'og:image', 'twitter:image'],
|
||||
webpage, 'thumbnail', default=None),
|
||||
|
||||
@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
|
||||
|
||||
|
||||
class SangiinInstructionIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
IE_DESC = False # this shouldn't be listed as a supported site
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
||||
raise ExtractorError(
|
||||
'Copy the link from the button below the video description/player '
|
||||
'and use that link to download. If there is no button in the frame, '
|
||||
'get the URL of the frame showing the video.', expected=True)
|
||||
|
||||
|
||||
class SangiinIE(InfoExtractor):
|
||||
|
||||
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
|
||||
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
|
||||
_PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
|
||||
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
@@ -39,7 +39,7 @@ class KhanAcademyBaseIE(InfoExtractor):
|
||||
query={
|
||||
'fastly_cacheable': 'persist_until_publish',
|
||||
'pcv': self._PUBLISHED_CONTENT_VERSION,
|
||||
'hash': '1242644265',
|
||||
'hash': '3712657851',
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'countryCode': 'US',
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
@@ -25,104 +30,212 @@ class KickBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
||||
return self._download_json(
|
||||
f'https://kick.com/api/v1/{path}', display_id, note=note,
|
||||
f'https://kick.com/api/{path}', display_id, note=note,
|
||||
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
|
||||
|
||||
|
||||
class KickIE(KickBaseIE):
|
||||
IE_NAME = 'kick:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/yuppy',
|
||||
'url': 'https://kick.com/buddha',
|
||||
'info_dict': {
|
||||
'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
|
||||
'id': '92722911-nopixel-40',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'channel': 'yuppy',
|
||||
'channel_id': '33538',
|
||||
'uploader': 'Yuppy',
|
||||
'uploader_id': '33793',
|
||||
'upload_date': str,
|
||||
'live_status': 'is_live',
|
||||
'timestamp': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': list,
|
||||
'upload_date': str,
|
||||
'channel': 'buddha',
|
||||
'channel_id': '32807',
|
||||
'uploader': 'Buddha',
|
||||
'uploader_id': '33057',
|
||||
'live_status': 'is_live',
|
||||
'concurrent_view_count': int,
|
||||
'release_timestamp': int,
|
||||
'age_limit': 18,
|
||||
'release_date': str,
|
||||
},
|
||||
'skip': 'livestream',
|
||||
'params': {'skip_download': 'livestream'},
|
||||
# 'skip': 'livestream',
|
||||
}, {
|
||||
'url': 'https://kick.com/kmack710',
|
||||
'url': 'https://kick.com/xqc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
response = self._call_api(f'channels/{channel}', channel)
|
||||
response = self._call_api(f'v2/channels/{channel}', channel)
|
||||
if not traverse_obj(response, 'livestream', expected_type=dict):
|
||||
raise UserNotLive(video_id=channel)
|
||||
|
||||
return {
|
||||
'id': str(traverse_obj(
|
||||
response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
response['playback_url'], channel, 'mp4', live=True),
|
||||
'title': traverse_obj(
|
||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
||||
'description': traverse_obj(response, ('user', 'bio')),
|
||||
'channel': channel,
|
||||
'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
|
||||
'uploader': traverse_obj(response, 'name', ('user', 'username')),
|
||||
'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
|
||||
'is_live': True,
|
||||
'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
|
||||
'thumbnail': traverse_obj(
|
||||
response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
|
||||
'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
|
||||
'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True),
|
||||
**traverse_obj(response, {
|
||||
'id': ('livestream', 'slug', {str}),
|
||||
'title': ('livestream', 'session_title', {str}),
|
||||
'description': ('user', 'bio', {str}),
|
||||
'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any),
|
||||
'uploader': (('name', ('user', 'username')), {str}, any),
|
||||
'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any),
|
||||
'timestamp': ('livestream', 'created_at', {unified_timestamp}),
|
||||
'release_timestamp': ('livestream', 'start_time', {unified_timestamp}),
|
||||
'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}),
|
||||
'categories': ('recent_categories', ..., 'name', {str}),
|
||||
'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}),
|
||||
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KickVODIE(KickBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
IE_NAME = 'kick:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
|
||||
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
||||
'info_dict': {
|
||||
'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
|
||||
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'ext': 'mp4',
|
||||
'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
|
||||
'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
|
||||
'channel': 'jaredfps',
|
||||
'channel_id': '26608',
|
||||
'uploader': 'JaredFPS',
|
||||
'uploader_id': '26799',
|
||||
'upload_date': '20240402',
|
||||
'timestamp': 1712097108,
|
||||
'duration': 33859.0,
|
||||
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
|
||||
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
|
||||
'channel': 'xqc',
|
||||
'channel_id': '668',
|
||||
'uploader': 'xQc',
|
||||
'uploader_id': '676',
|
||||
'upload_date': '20240909',
|
||||
'timestamp': 1725919141,
|
||||
'duration': 10155.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'categories': ['Call of Duty: Warzone'],
|
||||
'view_count': int,
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': [r'impersonation'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
response = self._call_api(f'video/{video_id}', video_id)
|
||||
response = self._call_api(f'v1/video/{video_id}', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
|
||||
'title': traverse_obj(
|
||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
||||
'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
|
||||
'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
|
||||
'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
|
||||
'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
|
||||
'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
|
||||
'timestamp': unified_timestamp(response.get('created_at')),
|
||||
'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
|
||||
'thumbnail': traverse_obj(
|
||||
response, ('livestream', 'thumbnail'), expected_type=url_or_none),
|
||||
'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
|
||||
**traverse_obj(response, {
|
||||
'title': ('livestream', ('session_title', 'slug'), {str}, any),
|
||||
'description': ('livestream', 'channel', 'user', 'bio', {str}),
|
||||
'channel': ('livestream', 'channel', 'slug', {str}),
|
||||
'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}),
|
||||
'uploader': ('livestream', 'channel', 'user', 'username', {str}),
|
||||
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
|
||||
'categories': ('livestream', 'categories', ..., 'name', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KickClipIE(KickBaseIE):
|
||||
IE_NAME = 'kick:clips'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'info_dict': {
|
||||
'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maddy detains Abd D:',
|
||||
'channel': 'mxddy',
|
||||
'channel_id': '133789',
|
||||
'uploader': 'AbdCreates',
|
||||
'uploader_id': '3309077',
|
||||
'thumbnail': r're:^https?://.*\.jpeg',
|
||||
'duration': 35,
|
||||
'timestamp': 1682481453,
|
||||
'upload_date': '20230426',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['VALORANT'],
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3',
|
||||
'info_dict': {
|
||||
'id': 'clip_01H9SKET879NE7N9RJRRDS98J3',
|
||||
'title': 'W jews',
|
||||
'ext': 'mp4',
|
||||
'channel': 'destiny',
|
||||
'channel_id': '1772249',
|
||||
'uploader': 'punished_furry',
|
||||
'uploader_id': '2027722',
|
||||
'duration': 49.0,
|
||||
'upload_date': '20230908',
|
||||
'timestamp': 1694150180,
|
||||
'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'info_dict': {
|
||||
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'ext': 'mp4',
|
||||
'title': 'KLJASLDJKLJKASDLJKDAS',
|
||||
'channel': 'spreen',
|
||||
'channel_id': '5312671',
|
||||
'uploader': 'AnormalBarraBaja',
|
||||
'uploader_id': '26518262',
|
||||
'duration': 43.0,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727399987,
|
||||
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Minecraft'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
clip_id = self._match_id(url)
|
||||
clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip']
|
||||
clip_url = clip['clip_url']
|
||||
|
||||
if determine_ext(clip_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4')
|
||||
else:
|
||||
formats = [{'url': clip_url}]
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(clip, {
|
||||
'title': ('title', {str}),
|
||||
'channel': ('channel', 'slug', {str}),
|
||||
'channel_id': ('channel', 'id', {int}, {str_or_none}),
|
||||
'uploader': ('creator', 'username', {str}),
|
||||
'uploader_id': ('creator', 'id', {int}, {str_or_none}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'categories': ('category', 'name', {str}, all),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
126
yt_dlp/extractor/kika.py
Normal file
126
yt_dlp/extractor/kika.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class KikaIE(InfoExtractor):
|
||||
IE_DESC = 'KiKA.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
|
||||
'info_dict': {
|
||||
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240831',
|
||||
'timestamp': 1725126600,
|
||||
'season_number': 2024,
|
||||
'modified_date': '20240831',
|
||||
'episode': 'Episode 476',
|
||||
'episode_number': 476,
|
||||
'season': 'Season 2024',
|
||||
'duration': 634,
|
||||
'title': 'logo! vom Samstag, 31. August 2024',
|
||||
'modified_timestamp': 1725129983,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/kaltstart/videos/video92498',
|
||||
'md5': '710ece827e5055094afeb474beacb7aa',
|
||||
'info_dict': {
|
||||
'id': 'video92498',
|
||||
'ext': 'mp4',
|
||||
'title': '7. Wo ist Leo?',
|
||||
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
|
||||
'duration': 436,
|
||||
'timestamp': 1702926876,
|
||||
'upload_date': '20231218',
|
||||
'episode_number': 7,
|
||||
'modified_date': '20240319',
|
||||
'modified_timestamp': 1710880610,
|
||||
'episode': 'Episode 7',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
|
||||
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
|
||||
'info_dict': {
|
||||
'id': 'video90088',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20221102',
|
||||
'timestamp': 1667390580,
|
||||
'duration': 197,
|
||||
'modified_timestamp': 1711093771,
|
||||
'episode_number': 8,
|
||||
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
|
||||
'modified_date': '20240322',
|
||||
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 8',
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
|
||||
video_assets = self._download_json(doc['assets']['url'], video_id)
|
||||
|
||||
subtitles = {}
|
||||
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
|
||||
subtitles['de'] = [{
|
||||
'url': ttml_resource,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': webvtt_resource,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': list(self._extract_formats(video_assets, video_id)),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(doc, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'modified_timestamp': ('modificationDate', {parse_iso8601}),
|
||||
'duration': ((
|
||||
('durationInSeconds', {int_or_none}),
|
||||
('duration', {parse_duration})), any),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
|
||||
stream_url = media['url']
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'm3u8':
|
||||
yield from self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
yield {
|
||||
'url': stream_url,
|
||||
'format_id': ext,
|
||||
**traverse_obj(media, {
|
||||
'width': ('frameWidth', {int_or_none}),
|
||||
'height': ('frameHeight', {int_or_none}),
|
||||
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
|
||||
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
|
||||
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
}),
|
||||
}
|
||||
78
yt_dlp/extractor/learningonscreen.py
Normal file
78
yt_dlp/extractor/learningonscreen.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LearningOnScreenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
|
||||
'info_dict': {
|
||||
'id': '005D81B2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planet Earth',
|
||||
'duration': 3600.0,
|
||||
'timestamp': 1164567600.0,
|
||||
'upload_date': '20061126',
|
||||
'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||
self.raise_login_required(
|
||||
'Use --cookies for authentication. See '
|
||||
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
|
||||
'for how to manually pass cookies', method=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
details = traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'programme-details')}, {
|
||||
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
|
||||
'timestamp': (
|
||||
{functools.partial(get_element_by_class, 'broadcast-date')},
|
||||
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
|
||||
'duration': (
|
||||
{functools.partial(get_element_by_class, 'prog-running-time')},
|
||||
{clean_html}, {parse_duration}),
|
||||
}))
|
||||
|
||||
title = details.pop('title', None) or traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
|
||||
{extract_attributes}, 'data-record-title', {clean_html}))
|
||||
|
||||
entries = self._parse_html5_media_entries(
|
||||
'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
|
||||
_headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
|
||||
if not entries:
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
if len(entries) > 1:
|
||||
duration = details.pop('duration', None)
|
||||
for idx, entry in enumerate(entries, start=1):
|
||||
entry.update(details)
|
||||
entry['id'] = join_nonempty(video_id, idx)
|
||||
entry['title'] = join_nonempty(title, idx)
|
||||
return self.playlist_result(entries, video_id, title, duration=duration)
|
||||
|
||||
return {
|
||||
**entries[0],
|
||||
**details,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
@@ -1,86 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||
'info_dict': {
|
||||
'id': '10809',
|
||||
'ext': 'mp4',
|
||||
'title': "Put'ka: Trys Klausimai",
|
||||
'upload_date': '20161216',
|
||||
'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
|
||||
'age_limit': 18,
|
||||
'duration': 117,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1481904000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
|
||||
'info_dict': {
|
||||
'id': '10467',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nėrdas: Kompiuterio Valymas',
|
||||
'upload_date': '20150113',
|
||||
'description': 'md5:7352d113a242a808676ff17e69db6a69',
|
||||
'age_limit': 18,
|
||||
'duration': 346,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421164800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_LIMITS = {
|
||||
'N-7': 7,
|
||||
'N-14': 14,
|
||||
'S': 18,
|
||||
}
|
||||
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
|
||||
display_id)['videoConfig']['videoInfo']
|
||||
|
||||
video_id = str(video_info['id'])
|
||||
title = video_info['title']
|
||||
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'description': clean_html(video_info.get('htmlDescription')),
|
||||
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
|
||||
'timestamp': parse_iso8601(video_info.get('airDate')),
|
||||
'view_count': int_or_none(video_info.get('viewsCount')),
|
||||
}
|
||||
|
||||
|
||||
class LnkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'
|
||||
|
||||
@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response
|
||||
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': '8488817242a0db1cb2ad0ea522553cf6',
|
||||
'md5': 'b321d261656848c184a94e3b93eae28d',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
|
||||
'timestamp': 1657216459,
|
||||
'duration': 181,
|
||||
},
|
||||
'params': {'format': 'bestvideo'}, # Test video-only fixup
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
|
||||
format_url = format_url.replace('-split.m3u8', '.m3u8')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
|
||||
# Sometimes only split video/audio formats are available, need to fixup video-only formats
|
||||
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
|
||||
for fmt in m3u8_formats:
|
||||
if is_not_premerged and fmt.get('vcodec') != 'none':
|
||||
fmt['acodec'] = 'none'
|
||||
yield {
|
||||
**fmt,
|
||||
'url': update_url(fmt['url'], query=query),
|
||||
|
||||
@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
|
||||
video_data = None
|
||||
|
||||
# fix meta_url if missing the host address
|
||||
if re.match(r'^\/\+\/', meta_url):
|
||||
if re.match(r'\/\+\/', meta_url):
|
||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||
|
||||
if meta_url:
|
||||
|
||||
@@ -13,8 +13,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
IE_DESC = 'MDR.DE'
|
||||
_VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
@@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
||||
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
||||
'info_dict': {
|
||||
'id': '19636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baumhaus vom 30. Oktober 2015',
|
||||
'duration': 134,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
||||
'info_dict': {
|
||||
'id': '8182',
|
||||
'ext': 'mp4',
|
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||
'timestamp': 1482541200,
|
||||
'upload_date': '20161224',
|
||||
'duration': 4628,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
# audio with alternative playerURL pattern
|
||||
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
|
||||
@@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
|
||||
'duration': 3239,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
# empty bitrateVideo and bitrateAudio
|
||||
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
|
||||
'info_dict': {
|
||||
'id': '128372',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der kleine Wichtel kehrt zurück',
|
||||
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
|
||||
'duration': 4876,
|
||||
'timestamp': 1607823300,
|
||||
'upload_date': '20201213',
|
||||
'uploader': 'ZDF',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
|
||||
(?P<id>[^/#?_]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
|
||||
'info_dict': {
|
||||
'id': '668177',
|
||||
'title': 'Az ajtó',
|
||||
'display_id': 'az-ajto',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
|
||||
},
|
||||
}, {
|
||||
# (old) mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||
'info_dict': {
|
||||
@@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230903',
|
||||
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) m4sport
|
||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||
@@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230908',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# m4sport with *video/ url and no date
|
||||
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
|
||||
@@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) hirado
|
||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||
@@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230911',
|
||||
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}, {
|
||||
# (old) petofilive
|
||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||
@@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230909',
|
||||
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -133,7 +147,9 @@ class MediaKlikkIE(InfoExtractor):
|
||||
r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
|
||||
|
||||
player_data['video'] = player_data.pop('token')
|
||||
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
|
||||
player_page = self._download_webpage(
|
||||
'https://player.mediaklikk.hu/playernew/player.php', video_id,
|
||||
query=player_data, headers={'Referer': url})
|
||||
player_json = self._search_json(
|
||||
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
|
||||
playlist_url = traverse_obj(
|
||||
@@ -141,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
|
||||
if not playlist_url:
|
||||
raise ExtractorError('Unable to extract playlist url')
|
||||
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
IE_NAME = 'MangoTV'
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_BASE_URL = 'http://ocw.mit.edu/'
|
||||
|
||||
_TESTS = [
|
||||
|
||||
@@ -1,16 +1,21 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
@@ -276,81 +281,225 @@ class MLBVideoIE(MLBBaseIE):
|
||||
class MLBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
||||
_NETRC_MACHINE = 'mlb'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
||||
'info_dict': {
|
||||
'id': '661581',
|
||||
'ext': 'mp4',
|
||||
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
||||
'release_date': '20220702',
|
||||
'release_timestamp': 1656792300,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# makeup game: has multiple dates, need to avoid games with 'rescheduleDate'
|
||||
'url': 'https://www.mlb.com/tv/g747039/vd22541c4-5a29-45f7-822b-635ec041cf5e',
|
||||
'info_dict': {
|
||||
'id': '747039',
|
||||
'ext': 'mp4',
|
||||
'title': '2024-07-29 - Toronto Blue Jays @ Baltimore Orioles',
|
||||
'release_date': '20240729',
|
||||
'release_timestamp': 1722280200,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
_GRAPHQL_INIT_QUERY = '''\
|
||||
mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
|
||||
initSession(device: $device, clientType: $clientType, experience: $experience) {
|
||||
deviceId
|
||||
sessionId
|
||||
entitlements {
|
||||
code
|
||||
}
|
||||
location {
|
||||
countryCode
|
||||
regionName
|
||||
zipCode
|
||||
latitude
|
||||
longitude
|
||||
}
|
||||
clientExperience
|
||||
features
|
||||
}
|
||||
}'''
|
||||
_GRAPHQL_PLAYBACK_QUERY = '''\
|
||||
mutation initPlaybackSession(
|
||||
$adCapabilities: [AdExperienceType]
|
||||
$mediaId: String!
|
||||
$deviceId: String!
|
||||
$sessionId: String!
|
||||
$quality: PlaybackQuality
|
||||
) {
|
||||
initPlaybackSession(
|
||||
adCapabilities: $adCapabilities
|
||||
mediaId: $mediaId
|
||||
deviceId: $deviceId
|
||||
sessionId: $sessionId
|
||||
quality: $quality
|
||||
) {
|
||||
playbackSessionId
|
||||
playback {
|
||||
url
|
||||
token
|
||||
expiration
|
||||
cdn
|
||||
}
|
||||
}
|
||||
}'''
|
||||
_APP_VERSION = '7.8.2'
|
||||
_device_id = None
|
||||
_session_id = None
|
||||
_access_token = None
|
||||
_token_expiry = 0
|
||||
|
||||
@property
|
||||
def _api_headers(self):
|
||||
if (self._token_expiry - 120) <= time.time():
|
||||
self.write_debug('Access token has expired; re-logging in')
|
||||
self._perform_login(*self._get_login_info())
|
||||
return {'Authorization': f'Bearer {self._access_token}'}
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._access_token:
|
||||
self.raise_login_required(
|
||||
'All videos are only available to registered users', method='password')
|
||||
|
||||
def _set_device_id(self, username):
|
||||
if not self._device_id:
|
||||
self._device_id = self.cache.load(
|
||||
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
||||
if self._device_id:
|
||||
return
|
||||
self._device_id = str(uuid.uuid4())
|
||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
|
||||
access_token = self._download_json(
|
||||
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
||||
headers={
|
||||
'User-Agent': 'okhttp/3.12.1',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=data.encode())['access_token']
|
||||
try:
|
||||
self._access_token = self._download_json(
|
||||
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
||||
'Logging in', 'Unable to log in', headers={
|
||||
'User-Agent': 'okhttp/3.12.1',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'scope': 'openid offline_access',
|
||||
'client_id': '0oa3e1nutA1HLzAKG356',
|
||||
}))['access_token']
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 400:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
raise
|
||||
|
||||
entitlement = self._download_webpage(
|
||||
f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None,
|
||||
headers={
|
||||
'User-Agent': 'okhttp/3.12.1',
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
})
|
||||
self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0
|
||||
self._set_device_id(username)
|
||||
|
||||
data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
|
||||
self._access_token = self._download_json(
|
||||
'https://us.edge.bamgrid.com/token', None,
|
||||
self._session_id = self._call_api({
|
||||
'operationName': 'initSession',
|
||||
'query': self._GRAPHQL_INIT_QUERY,
|
||||
'variables': {
|
||||
'device': {
|
||||
'appVersion': self._APP_VERSION,
|
||||
'deviceFamily': 'desktop',
|
||||
'knownDeviceId': self._device_id,
|
||||
'languagePreference': 'ENGLISH',
|
||||
'manufacturer': '',
|
||||
'model': '',
|
||||
'os': '',
|
||||
'osVersion': '',
|
||||
},
|
||||
'clientType': 'WEB',
|
||||
},
|
||||
}, None, 'session ID')['data']['initSession']['sessionId']
|
||||
|
||||
def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True):
|
||||
return self._download_json(
|
||||
'https://media-gateway.mlb.com/graphql', video_id,
|
||||
f'Downloading {description}', f'Unable to download {description}', fatal=fatal,
|
||||
headers={
|
||||
**self._api_headers,
|
||||
'Accept': 'application/json',
|
||||
'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=data.encode())['access_token']
|
||||
'Content-Type': 'application/json',
|
||||
'x-client-name': 'WEB',
|
||||
'x-client-version': self._APP_VERSION,
|
||||
}, data=json.dumps(data, separators=(',', ':')).encode())
|
||||
|
||||
def _extract_formats_and_subtitles(self, broadcast, video_id):
|
||||
feed = traverse_obj(broadcast, ('homeAway', {str.title}))
|
||||
medium = traverse_obj(broadcast, ('type', {str}))
|
||||
language = traverse_obj(broadcast, ('language', {str.lower}))
|
||||
format_id = join_nonempty(feed, medium, language)
|
||||
|
||||
response = self._call_api({
|
||||
'operationName': 'initPlaybackSession',
|
||||
'query': self._GRAPHQL_PLAYBACK_QUERY,
|
||||
'variables': {
|
||||
'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'],
|
||||
'deviceId': self._device_id,
|
||||
'mediaId': broadcast['mediaId'],
|
||||
'quality': 'PLACEHOLDER',
|
||||
'sessionId': self._session_id,
|
||||
},
|
||||
}, video_id, f'{format_id} broadcast JSON', fatal=False)
|
||||
|
||||
playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict}))
|
||||
m3u8_url = traverse_obj(playback, ('url', {url_or_none}))
|
||||
token = traverse_obj(playback, ('token', {str}))
|
||||
|
||||
if not (m3u8_url and token):
|
||||
errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str})))
|
||||
if 'not entitled' in errors:
|
||||
raise ExtractorError(errors, expected=True)
|
||||
elif errors: # Only warn when 'blacked out' since radio formats are available
|
||||
self.report_warning(f'API returned errors for {format_id}: {errors}')
|
||||
else:
|
||||
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
|
||||
return [], {}
|
||||
|
||||
cdn_headers = {'x-cdn-token': token}
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
|
||||
m3u8_id=format_id, fatal=False, headers=cdn_headers)
|
||||
for fmt in fmts:
|
||||
fmt['http_headers'] = cdn_headers
|
||||
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
|
||||
fmt.setdefault('language', language)
|
||||
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
|
||||
fmt['source_preference'] = 10
|
||||
|
||||
return fmts, subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
airings = self._download_json(
|
||||
f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
|
||||
video_id)['data']['Airings']
|
||||
data = self._download_json(
|
||||
'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
|
||||
'gamePk': video_id,
|
||||
'hydrate': 'broadcasts(all),statusFlags',
|
||||
})
|
||||
metadata = traverse_obj(data, (
|
||||
'dates', ..., 'games',
|
||||
lambda _, v: str(v['gamePk']) == video_id and not v.get('rescheduleDate'), any))
|
||||
|
||||
broadcasts = traverse_obj(metadata, (
|
||||
'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
|
||||
format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
|
||||
m3u8_url = traverse_obj(self._download_json(
|
||||
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
|
||||
note=f'Downloading {format_id} stream info JSON',
|
||||
errnote=f'Failed to download {format_id} stream info, skipping',
|
||||
fatal=False, headers={
|
||||
'Authorization': self._access_token,
|
||||
'Accept': 'application/vnd.media-service+json; version=2',
|
||||
}), ('stream', 'complete', {url_or_none}))
|
||||
if not m3u8_url:
|
||||
continue
|
||||
f, s = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
formats.extend(f)
|
||||
self._merge_subtitles(s, target=subtitles)
|
||||
for broadcast in broadcasts:
|
||||
fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
|
||||
'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
|
||||
'title': join_nonempty(
|
||||
traverse_obj(metadata, ('officialDate', {str})),
|
||||
traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})),
|
||||
delim=' - '),
|
||||
'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON',
|
||||
'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
|
||||
}
|
||||
|
||||
|
||||
|
||||
121
yt_dlp/extractor/mojevideo.py
Normal file
121
yt_dlp/extractor/mojevideo.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, remove_end, update_url_query
|
||||
|
||||
|
||||
class MojevideoIE(InfoExtractor):
|
||||
IE_DESC = 'mojevideo.sk'
|
||||
_VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P<id>\w+)/(?P<display_id>[\w()]+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html',
|
||||
'md5': '384a4628bd2bbd261c5206cf77c38c17',
|
||||
'info_dict': {
|
||||
'id': '3d17c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chlapci dobetónovali sme, máme hotovo!',
|
||||
'display_id': 'chlapci_dobetonovali_sme_mame_hotovo',
|
||||
'description': 'md5:a0822126044050d304a9ef58c92ddb34',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg',
|
||||
'duration': 21.0,
|
||||
'upload_date': '20230919',
|
||||
'timestamp': 1695129706,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html',
|
||||
'md5': '517c3e111c53a67d10b429c1f344ba2f',
|
||||
'info_dict': {
|
||||
'id': '14677',
|
||||
'ext': 'mp4',
|
||||
'title': 'Deň blbec?',
|
||||
'display_id': 'den_blbec',
|
||||
'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg',
|
||||
'duration': 100.0,
|
||||
'upload_date': '20120515',
|
||||
'timestamp': 1337076481,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html',
|
||||
'md5': '64599a23d3ac31cf2fe069e4353d8162',
|
||||
'info_dict': {
|
||||
'id': '2feb2',
|
||||
'ext': 'mp4',
|
||||
'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)',
|
||||
'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)',
|
||||
'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg',
|
||||
'duration': 240.0,
|
||||
'upload_date': '20190708',
|
||||
'timestamp': 1562576592,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id_dec = self._search_regex(
|
||||
r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16))
|
||||
video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry')
|
||||
video_hashes = self._search_json(
|
||||
r'\bvHash\s*=', webpage, 'video hashes', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
|
||||
('', 1, 'normálna kvalita'),
|
||||
('_lq', 0, 'nízka kvalita'),
|
||||
('_hd', 2, 'HD-720p'),
|
||||
('_fhd', 3, 'FULL HD-1080p'),
|
||||
('_2k', 4, '2K-1440p'),
|
||||
]):
|
||||
formats.append({
|
||||
'format_id': f'mp4-{quality}',
|
||||
'quality': quality,
|
||||
'format_note': format_note,
|
||||
'url': update_url_query(
|
||||
f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', {
|
||||
'md5': video_hash,
|
||||
'expires': video_exp,
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'title': (self._og_search_title(webpage, default=None)
|
||||
or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')),
|
||||
'description': self._og_search_description(webpage),
|
||||
**self._search_json_ld(webpage, video_id, default={}),
|
||||
}
|
||||
@@ -40,7 +40,6 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||
'info_dict': {
|
||||
'id': 'sm22312215',
|
||||
'ext': 'mp4',
|
||||
@@ -56,8 +55,8 @@ class NiconicoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'genres': ['未設定'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# File downloaded with and without credentials are different, so omit
|
||||
# the md5 field
|
||||
@@ -77,8 +76,8 @@ class NiconicoIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'genres': ['音楽・サウンド'],
|
||||
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# 'video exists but is marked as "deleted"
|
||||
# md5 is unstable
|
||||
@@ -112,7 +111,6 @@ class NiconicoIE(InfoExtractor):
|
||||
}, {
|
||||
# video not available via `getflv`; "old" HTML5 video
|
||||
'url': 'http://www.nicovideo.jp/watch/sm1151009',
|
||||
'md5': 'f95a3d259172667b293530cc2e41ebda',
|
||||
'info_dict': {
|
||||
'id': 'sm1151009',
|
||||
'ext': 'mp4',
|
||||
@@ -128,11 +126,10 @@ class NiconicoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'genres': ['ゲーム'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# "New" HTML5 video
|
||||
# md5 is unstable
|
||||
'url': 'http://www.nicovideo.jp/watch/sm31464864',
|
||||
'info_dict': {
|
||||
'id': 'sm31464864',
|
||||
@@ -149,12 +146,11 @@ class NiconicoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'genres': ['アニメ'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Video without owner
|
||||
'url': 'http://www.nicovideo.jp/watch/sm18238488',
|
||||
'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
|
||||
'info_dict': {
|
||||
'id': 'sm18238488',
|
||||
'ext': 'mp4',
|
||||
@@ -168,8 +164,8 @@ class NiconicoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'genres': ['エンターテイメント'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||
'only_matching': True,
|
||||
@@ -424,7 +420,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'x-request-with': 'https://www.nicovideo.jp',
|
||||
})['data']['contentUrl']
|
||||
# Getting all audio formats results in duplicate video formats which we filter out later
|
||||
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
|
||||
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
|
||||
|
||||
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
|
||||
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
|
||||
@@ -436,7 +432,6 @@ class NiconicoIE(InfoExtractor):
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
}), get_all=False),
|
||||
'acodec': 'aac',
|
||||
'ext': 'm4a',
|
||||
}
|
||||
|
||||
# Sort before removing dupes to keep the format dicts with the lowest tbr
|
||||
@@ -458,9 +453,11 @@ class NiconicoIE(InfoExtractor):
|
||||
if video_id.startswith('so'):
|
||||
video_id = self._match_id(handle.url)
|
||||
|
||||
api_data = self._parse_json(self._html_search_regex(
|
||||
'data-api-data="([^"]+)"', webpage,
|
||||
'API data', default='{}'), video_id)
|
||||
api_data = traverse_obj(
|
||||
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
|
||||
('data', 'response', {dict}))
|
||||
if not api_data:
|
||||
raise ExtractorError('Server response data not found')
|
||||
except ExtractorError as e:
|
||||
try:
|
||||
api_data = self._download_json(
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NZOnScreenIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
|
||||
class NZZIE(InfoExtractor):
|
||||
@@ -22,19 +19,14 @@ class NZZIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _entries(self, webpage, page_id):
|
||||
for script in re.findall(r'(?s)<script[^>]* data-hid="jw-video-jw[^>]+>(.+?)</script>', webpage):
|
||||
settings = self._search_json(r'var\s+settings\s*=[^{]*', script, 'settings', page_id, fatal=False)
|
||||
if entry := self._parse_jwplayer_data(settings, page_id):
|
||||
yield entry
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
entries = []
|
||||
for player_element in re.findall(
|
||||
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
|
||||
player_params = extract_attributes(player_element)
|
||||
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
||||
self.report_warning('Unsupported player type')
|
||||
continue
|
||||
entry_id = player_params['data-id']
|
||||
entries.append(self.url_result(
|
||||
'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
|
||||
|
||||
return self.playlist_result(entries, page_id)
|
||||
return self.playlist_result(self._entries(webpage, page_id), page_id)
|
||||
|
||||
@@ -1,9 +1,19 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, try_get
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
try_get,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class OlympicsReplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P<id>[^/#&?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?olympics\.com/[a-z]{2}/(?:paris-2024/)?(?:replay|videos?|original-series/episode)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays',
|
||||
'info_dict': {
|
||||
@@ -11,26 +21,105 @@ class OlympicsReplayIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020',
|
||||
'upload_date': '20210801',
|
||||
'timestamp': 1627783200,
|
||||
'timestamp': 1627797600,
|
||||
'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3',
|
||||
'uploader': 'International Olympic Committee',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/nua4o7zwyaznoaejpbk2',
|
||||
'duration': 7017.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp',
|
||||
'only_matching': True,
|
||||
'url': 'https://olympics.com/en/original-series/episode/b-boys-and-b-girls-take-the-spotlight-breaking-life-road-to-paris-2024',
|
||||
'info_dict': {
|
||||
'id': '32633650-c5ee-4280-8b94-fb6defb6a9b5',
|
||||
'ext': 'mp4',
|
||||
'title': 'B-girl Nicka - Breaking Life, Road to Paris 2024 | Episode 1',
|
||||
'upload_date': '20240517',
|
||||
'timestamp': 1715948200,
|
||||
'description': 'md5:f63d728a41270ec628f6ac33ce471bb1',
|
||||
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/a3j96l7j6so3vyfijby1',
|
||||
'duration': 1321.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://olympics.com/en/paris-2024/videos/men-s-preliminaries-gbr-esp-ned-rsa-hockey-olympic-games-paris-2024',
|
||||
'info_dict': {
|
||||
'id': '3d96db23-8eee-4b7c-8ef5-488a0361026c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Men\'s Preliminaries GBR-ESP & NED-RSA | Hockey | Olympic Games Paris 2024',
|
||||
'upload_date': '20240727',
|
||||
'timestamp': 1722066600,
|
||||
},
|
||||
'skip': 'Geo-restricted to RU, BR, BT, NP, TM, BD, TL',
|
||||
}, {
|
||||
'url': 'https://olympics.com/en/paris-2024/videos/dnp-suni-lee-i-have-goals-and-i-have-expectations-for-myself-but-i-also-am-trying-to-give-myself-grace',
|
||||
'info_dict': {
|
||||
'id': 'a42f37ab-8a74-41d0-a7d9-af27b7b02a90',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c7cfbc9918636a98e66400a812e4d407',
|
||||
'upload_date': '20240729',
|
||||
'timestamp': 1722288600,
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _extract_from_nextjs_data(self, webpage, video_id):
|
||||
data = traverse_obj(self._search_nextjs_data(webpage, video_id, default={}), (
|
||||
'props', 'pageProps', 'page', 'items',
|
||||
lambda _, v: v['name'] == 'videoPlaylist', 'data', 'currentVideo', {dict}, any))
|
||||
if not data:
|
||||
return None
|
||||
|
||||
geo_countries = traverse_obj(data, ('countries', ..., {str}))
|
||||
if traverse_obj(data, ('geoRestrictedVideo', {bool})):
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
|
||||
is_live = traverse_obj(data, ('streamingStatus', {str})) == 'LIVE'
|
||||
m3u8_url = traverse_obj(data, ('videoUrl', {url_or_none})) or data['streamUrl']
|
||||
tokenized_url = self._tokenize_url(m3u8_url, data['jwtToken'], is_live, video_id)
|
||||
|
||||
try:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
tokenized_url, video_id, 'mp4', m3u8_id='hls')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and 'georestricted' in e.cause.msg:
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
raise
|
||||
|
||||
return {
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
**traverse_obj(data, {
|
||||
'id': ('videoID', {str}),
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('contentDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _tokenize_url(self, url, token, is_live, video_id):
|
||||
return self._download_json(
|
||||
'https://metering.olympics.com/tokengenerator', video_id,
|
||||
'Downloading tokenized m3u8 url', query={
|
||||
**parse_qs(url),
|
||||
'url': update_url(url, query=None),
|
||||
'service-id': 'live' if is_live else 'vod',
|
||||
'user-auth': token,
|
||||
})['data']['url']
|
||||
|
||||
def _legacy_tokenize_url(self, url, video_id):
|
||||
return self._download_json(
|
||||
'https://olympics.com/tokenGenerator', video_id,
|
||||
'Downloading legacy tokenized m3u8 url', query={'url': url})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if info := self._extract_from_nextjs_data(webpage, video_id):
|
||||
return info
|
||||
|
||||
title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
|
||||
uuid = self._html_search_meta('episode_uid', webpage)
|
||||
video_uuid = self._html_search_meta('episode_uid', webpage)
|
||||
m3u8_url = self._html_search_meta('video_url', webpage)
|
||||
json_ld = self._search_json_ld(webpage, uuid)
|
||||
json_ld = self._search_json_ld(webpage, video_uuid)
|
||||
thumbnails_list = json_ld.get('image')
|
||||
if not thumbnails_list:
|
||||
thumbnails_list = self._html_search_regex(
|
||||
@@ -48,12 +137,12 @@ class OlympicsReplayIE(InfoExtractor):
|
||||
'width': width,
|
||||
'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)),
|
||||
})
|
||||
m3u8_url = self._download_json(
|
||||
f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls')
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
self._legacy_tokenize_url(m3u8_url, video_uuid), video_uuid, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'id': video_uuid,
|
||||
'title': title,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
|
||||
@@ -420,7 +420,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
|
||||
class PatreonCampaignIE(PatreonBaseIE):
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P<campaign_id>\d+))|(?P<vanity>[-\w]+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m|api/campaigns)/(?P<campaign_id>\d+)|(?P<vanity>[-\w]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.patreon.com/dissonancepod/',
|
||||
'info_dict': {
|
||||
@@ -442,25 +442,44 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
'url': 'https://www.patreon.com/m/4767637/posts',
|
||||
'info_dict': {
|
||||
'title': 'Not Just Bikes',
|
||||
'channel_follower_count': int,
|
||||
'id': '4767637',
|
||||
'channel_id': '4767637',
|
||||
'channel_url': 'https://www.patreon.com/notjustbikes',
|
||||
'description': 'md5:595c6e7dca76ae615b1d38c298a287a1',
|
||||
'description': 'md5:9f4b70051216c4d5c58afe580ffc8d0f',
|
||||
'age_limit': 0,
|
||||
'channel': 'Not Just Bikes',
|
||||
'uploader_url': 'https://www.patreon.com/notjustbikes',
|
||||
'uploader': 'Not Just Bikes',
|
||||
'uploader': 'Jason',
|
||||
'uploader_id': '37306634',
|
||||
'thumbnail': r're:^https?://.*$',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/api/campaigns/4243769/posts',
|
||||
'info_dict': {
|
||||
'title': 'Second Thought',
|
||||
'channel_follower_count': int,
|
||||
'id': '4243769',
|
||||
'channel_id': '4243769',
|
||||
'channel_url': 'https://www.patreon.com/secondthought',
|
||||
'description': 'md5:69c89a3aba43efdb76e85eb023e8de8b',
|
||||
'age_limit': 0,
|
||||
'channel': 'Second Thought',
|
||||
'uploader_url': 'https://www.patreon.com/secondthought',
|
||||
'uploader': 'JT Chapman',
|
||||
'uploader_id': '32718287',
|
||||
'thumbnail': r're:^https?://.*$',
|
||||
},
|
||||
'playlist_mincount': 201,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/dissonancepod/posts',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/m/5932659',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/api/campaigns/4243769',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -109,7 +109,7 @@ class PinterestBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class PinterestIE(PinterestBaseIE):
|
||||
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?P<id>\d+)'
|
||||
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?:[\w-]+--)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# formats found in data['videos']
|
||||
'url': 'https://www.pinterest.com/pin/664281013778109217/',
|
||||
@@ -174,6 +174,25 @@ class PinterestIE(PinterestBaseIE):
|
||||
}, {
|
||||
'url': 'https://co.pinterest.com/pin/824721750502199491/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://pinterest.com/pin/dive-into-serenity-blue-lagoon-pedi-nails-for-a-tranquil-and-refreshing-spa-experience-video-in-2024--2885187256207927',
|
||||
'info_dict': {
|
||||
'id': '2885187256207927',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dive into Serenity: Blue Lagoon Pedi Nails for a Tranquil and Refreshing Spa Experience! 💙💅',
|
||||
'description': 'md5:5da41c767d2317e42e49b663b0b2150f',
|
||||
'uploader': 'Glamour Artistry |Everyday Outfits, Luxury Fashion & Nail Designs',
|
||||
'uploader_id': '1142999717836434688',
|
||||
'upload_date': '20240702',
|
||||
'timestamp': 1719939156,
|
||||
'duration': 7.967,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'categories': 'count:9',
|
||||
'tags': ['#BlueLagoonPediNails', '#SpaExperience'],
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -628,8 +628,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
for e in page_entries:
|
||||
yield e
|
||||
yield from page_entries
|
||||
if not self._has_more(webpage):
|
||||
break
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
time_seconds,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
@@ -167,7 +168,7 @@ class RadikoBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class RadikoIE(RadikoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
# QRR (文化放送) station provides <desc>
|
||||
@@ -183,8 +184,9 @@ class RadikoIE(RadikoBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
station, video_id = self._match_valid_url(url).groups()
|
||||
vid_int = unified_timestamp(video_id, False)
|
||||
station, timestring = self._match_valid_url(url).group('station', 'timestring')
|
||||
video_id = join_nonempty(station, timestring)
|
||||
vid_int = unified_timestamp(timestring, False)
|
||||
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
|
||||
|
||||
auth_token, area_id = self._auth_client()
|
||||
@@ -207,7 +209,7 @@ class RadikoIE(RadikoBaseIE):
|
||||
'ft': radio_begin,
|
||||
'end_at': radio_end,
|
||||
'to': radio_end,
|
||||
'seek': video_id,
|
||||
'seek': timestring,
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RadioFranceIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||
IE_NAME = 'radiofrance'
|
||||
|
||||
_TEST = {
|
||||
|
||||
@@ -6,7 +6,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ReverbNationIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
||||
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import js_to_json
|
||||
|
||||
|
||||
class RTPIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
||||
'md5': 'e736ce0c665e459ddb818546220b4ef8',
|
||||
@@ -19,9 +19,25 @@ class RTPIE(InfoExtractor):
|
||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril',
|
||||
'md5': '9a81ed53f2b2197cfa7ed455b12f8ade',
|
||||
'info_dict': {
|
||||
'id': 'e757904',
|
||||
'ext': 'mp4',
|
||||
'title': '25 Curiosidades, 25 de Abril',
|
||||
'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_RX_OBFUSCATION = re.compile(r'''(?xs)
|
||||
@@ -49,17 +65,17 @@ class RTPIE(InfoExtractor):
|
||||
|
||||
f, config = self._search_regex(
|
||||
r'''(?sx)
|
||||
var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*
|
||||
(?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)?
|
||||
var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
|
||||
''', webpage,
|
||||
'player config', group=('f', 'config'))
|
||||
|
||||
f = self._parse_json(
|
||||
f, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
config = self._parse_json(
|
||||
config, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
f = config['file'] if not f else self._parse_json(
|
||||
f, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
|
||||
formats = []
|
||||
if isinstance(f, dict):
|
||||
|
||||
@@ -8,14 +8,17 @@ from ..utils import (
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
format_field,
|
||||
get_element_by_class,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -382,8 +385,10 @@ class RumbleChannelIE(InfoExtractor):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
break
|
||||
raise
|
||||
for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
|
||||
yield self.url_result('https://rumble.com' + video_url)
|
||||
for video_url in traverse_obj(
|
||||
get_elements_html_by_class('videostream__link', webpage), (..., {extract_attributes}, 'href'),
|
||||
):
|
||||
yield self.url_result(urljoin('https://rumble.com', video_url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
@@ -6,6 +6,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
@@ -80,6 +81,8 @@ class RutubeBaseIE(InfoExtractor):
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False))
|
||||
return formats
|
||||
|
||||
def _download_and_extract_formats(self, video_id, query=None):
|
||||
@@ -90,7 +93,7 @@ class RutubeBaseIE(InfoExtractor):
|
||||
class RutubeIE(RutubeBaseIE):
|
||||
IE_NAME = 'rutube'
|
||||
IE_DESC = 'Rutube videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
|
||||
|
||||
_TESTS = [{
|
||||
@@ -164,6 +167,29 @@ class RutubeIE(RutubeBaseIE):
|
||||
'uploader': 'Стас Быков',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m'],
|
||||
}, {
|
||||
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
|
||||
'info_dict': {
|
||||
'id': 'c58f502c7bb34a8fcdd976b221fca292',
|
||||
'ext': 'mp4',
|
||||
'categories': ['Телепередачи'],
|
||||
'description': '',
|
||||
'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
|
||||
'live_status': 'is_live',
|
||||
'age_limit': 0,
|
||||
'uploader_id': '23460655',
|
||||
'timestamp': 1652972968,
|
||||
'view_count': int,
|
||||
'upload_date': '20220519',
|
||||
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader': 'Первый канал',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -36,7 +36,7 @@ class SampleFocusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||
|
||||
sample_id = self._search_regex(
|
||||
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
|
||||
@@ -82,7 +82,15 @@ class SampleFocusIE(InfoExtractor):
|
||||
return {
|
||||
'id': sample_id,
|
||||
'title': title,
|
||||
'url': mp3_url,
|
||||
'formats': [{
|
||||
'url': mp3_url,
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
}],
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
|
||||
33
yt_dlp/extractor/screenrec.py
Normal file
33
yt_dlp/extractor/screenrec.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ScreenRecIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?screenrec\.com/share/(?P<id>\w{10})'
|
||||
_TESTS = [{
|
||||
'url': 'https://screenrec.com/share/DasLtbknYo',
|
||||
'info_dict': {
|
||||
'id': 'DasLtbknYo',
|
||||
'ext': 'mp4',
|
||||
'title': '02.05.2024_03.01.25_REC',
|
||||
'description': 'Recorded with ScreenRec',
|
||||
'thumbnail': r're:^https?://.*\.gif$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'customUrl\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 URL', group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'),
|
||||
}
|
||||
36
yt_dlp/extractor/sen.py
Normal file
36
yt_dlp/extractor/sen.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sen\.com/video/(?P<id>[0-9a-f-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.sen.com/video/eef46eb1-4d79-4e28-be9d-bd937767f8c4',
|
||||
'md5': 'ff615aca9691053c94f8f10d96cd7884',
|
||||
'info_dict': {
|
||||
'id': 'eef46eb1-4d79-4e28-be9d-bd937767f8c4',
|
||||
'ext': 'mp4',
|
||||
'description': 'Florida, 28 Sep 2022',
|
||||
'title': 'Hurricane Ian',
|
||||
'tags': ['North America', 'Storm', 'Weather'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_data = self._download_json(f'https://api.sen.com/content/public/video/{video_id}', video_id)
|
||||
m3u8_url = (traverse_obj(api_data, (
|
||||
'data', 'nodes', lambda _, v: v['id'] == 'player', 'video', 'url', {url_or_none}, any))
|
||||
or f'https://vod.sen.com/videos/{video_id}/manifest.m3u8')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||
**traverse_obj(api_data, ('data', 'nodes', lambda _, v: v['id'] == 'details', any, 'content', {
|
||||
'title': ('title', 'text', {str}),
|
||||
'description': ('descriptions', 0, 'text', {str}),
|
||||
'tags': ('badges', ..., 'text', {str}),
|
||||
})),
|
||||
}
|
||||
@@ -27,7 +27,7 @@ class ServusIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'AA-28BYCQNH92111',
|
||||
'ext': 'mp4',
|
||||
'title': 'Klettersteige in den Alpen',
|
||||
'title': 'Vie Ferrate - Klettersteige in den Alpen',
|
||||
'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2823,
|
||||
@@ -38,6 +38,7 @@ class ServusIE(InfoExtractor):
|
||||
'season_number': 11,
|
||||
'episode': 'Episode 8 - Vie Ferrate – Klettersteige in den Alpen',
|
||||
'episode_number': 8,
|
||||
'categories': ['Bergwelten'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -71,8 +72,11 @@ class ServusIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).upper()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
next_data = self._search_nextjs_data(webpage, video_id, fatal=False)
|
||||
|
||||
video = self._download_json(
|
||||
'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin',
|
||||
'https://api-player.redbull.com/stv/servus-tv-playnet',
|
||||
video_id, 'Downloading video JSON', query={'videoId': video_id})
|
||||
if not video.get('videoUrl'):
|
||||
self._report_errors(video)
|
||||
@@ -89,7 +93,7 @@ class ServusIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'description': self._get_description(video_id) or video.get('description'),
|
||||
'description': self._get_description(next_data) or video.get('description'),
|
||||
'thumbnail': video.get('poster'),
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'timestamp': unified_timestamp(video.get('currentSunrise')),
|
||||
@@ -100,16 +104,19 @@ class ServusIE(InfoExtractor):
|
||||
'episode_number': episode_number,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(next_data, ('props', 'pageProps', 'data', {
|
||||
'title': ('title', 'rendered', {str}),
|
||||
'timestamp': ('stv_date', 'raw', {int}),
|
||||
'duration': ('stv_duration', {float_or_none}),
|
||||
'categories': ('category_names', ..., {str}),
|
||||
})),
|
||||
}
|
||||
|
||||
def _get_description(self, video_id):
|
||||
info = self._download_json(
|
||||
f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page',
|
||||
video_id, fatal=False)
|
||||
|
||||
return join_nonempty(*traverse_obj(info, (
|
||||
('stv_short_description', 'stv_long_description'),
|
||||
{lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
|
||||
def _get_description(self, next_data):
|
||||
return join_nonempty(*traverse_obj(next_data, (
|
||||
'props', 'pageProps', 'data',
|
||||
('stv_short_description', 'stv_long_description'), {str},
|
||||
{lambda x: x.replace('\n\n', '\n')}, {unescapeHTML})), delim='\n\n')
|
||||
|
||||
def _report_errors(self, video):
|
||||
playability_errors = traverse_obj(video, ('playabilityErrors', ...))
|
||||
|
||||
76
yt_dlp/extractor/snapchat.py
Normal file
76
yt_dlp/extractor/snapchat.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SnapchatSpotlightIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?snapchat\.com/spotlight/(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
|
||||
'md5': '46c580f63592d0cbb76e974d2f9f0fcc',
|
||||
'info_dict': {
|
||||
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Views 💕',
|
||||
'description': '',
|
||||
'thumbnail': r're:https://cf-st\.sc-cdn\.net/d/kKJHIR1QAznRKK9jgYYDq\.256\.IRZXSOY',
|
||||
'duration': 4.665,
|
||||
'timestamp': 1637777831.369,
|
||||
'upload_date': '20211124',
|
||||
'repost_count': int,
|
||||
'uploader': 'shreypatel57',
|
||||
'uploader_url': 'https://www.snapchat.com/add/shreypatel57',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
|
||||
'md5': '4cd9626458c1a0e3e6dbe72c544a9ec2',
|
||||
'info_dict': {
|
||||
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Spotlight Snap',
|
||||
'description': 'How he flirt her teacher🤭🤭🤩😍 #kdrama#cdrama #dramaclips #dramaspotlight',
|
||||
'thumbnail': r're:https://cf-st\.sc-cdn\.net/i/ztfr6xFs0FOcFhwVczWfj\.256\.IRZXSOY',
|
||||
'duration': 10.91,
|
||||
'timestamp': 1722720291.307,
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'repost_count': int,
|
||||
'uploader': 'ganda0535',
|
||||
'uploader_url': 'https://www.snapchat.com/add/ganda0535',
|
||||
'tags': ['#dramaspotlight', '#dramaclips', '#cdrama', '#kdrama'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
page_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
video_data = traverse_obj(page_props, (
|
||||
'spotlightFeed', 'spotlightStories',
|
||||
lambda _, v: v['story']['storyId']['value'] == video_id, 'metadata', any), None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
**traverse_obj(video_data, ('videoMetadata', {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('uploadDateMs', {lambda x: float_or_none(x, 1000)}),
|
||||
'view_count': ('viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'repost_count': ('shareCount', {int_or_none}),
|
||||
'url': ('contentUrl', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'duration': ('durationMs', {lambda x: float_or_none(x, 1000)}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'uploader': ('creator', 'personCreator', 'username', {str}),
|
||||
'uploader_url': ('creator', 'personCreator', 'url', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(video_data, {
|
||||
'description': ('description', {str}),
|
||||
'tags': ('hashtags', ..., {str}),
|
||||
'view_count': ('engagementStats', 'viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'repost_count': ('engagementStats', 'shareCount', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -472,7 +472,7 @@ class SVTPageIE(SVTBaseIE):
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
urql_state = self._search_json(
|
||||
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
|
||||
r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
|
||||
|
||||
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class Tele13IE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||
|
||||
@@ -1,33 +1,31 @@
|
||||
import base64
|
||||
import datetime as dt
|
||||
import functools
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin
|
||||
from ..utils import int_or_none, traverse_obj, url_or_none, urljoin
|
||||
|
||||
|
||||
class TenPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
|
||||
_NETRC_MACHINE = '10play'
|
||||
_TESTS = [{
|
||||
'url': 'https://10play.com.au/neighbours/web-extras/season-39/nathan-borg-is-the-first-aussie-actor-with-a-cochlear-implant-to-join-neighbours/tpv210128qupwd',
|
||||
'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz',
|
||||
'info_dict': {
|
||||
'id': '6226844312001',
|
||||
'id': '6336940246112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
|
||||
'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
|
||||
'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
|
||||
'duration': 186,
|
||||
'season': 'Season 39',
|
||||
'season_number': 39,
|
||||
'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
|
||||
'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
|
||||
'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach',
|
||||
'duration': 74,
|
||||
'season': 'Season 41',
|
||||
'season_number': 41,
|
||||
'series': 'Neighbours',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'uploader': 'Channel 10',
|
||||
'age_limit': 15,
|
||||
'timestamp': 1611810000,
|
||||
'upload_date': '20210128',
|
||||
'timestamp': 1694386800,
|
||||
'upload_date': '20230910',
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
@@ -35,21 +33,30 @@ class TenPlayIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
}, {
|
||||
'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh',
|
||||
'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
|
||||
'info_dict': {
|
||||
'id': '6192880312001',
|
||||
'id': '9000000000091177',
|
||||
'ext': 'mp4',
|
||||
'title': "Todd Sampson's Body Hack - S4 Ep. 2",
|
||||
'description': 'md5:fa278820ad90f08ea187f9458316ac74',
|
||||
'title': 'Neighbours - S42 Ep. 9107',
|
||||
'alt_title': 'Thu 05 Sep',
|
||||
'description': 'md5:37a1f4271be34b9ee2b533426a5fbaef',
|
||||
'duration': 1388,
|
||||
'episode': 'Episode 9107',
|
||||
'episode_number': 9107,
|
||||
'season': 'Season 42',
|
||||
'season_number': 42,
|
||||
'series': 'Neighbours',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'age_limit': 15,
|
||||
'timestamp': 1600770600,
|
||||
'upload_date': '20200922',
|
||||
'timestamp': 1725517860,
|
||||
'upload_date': '20240905',
|
||||
'uploader': 'Channel 10',
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
}, {
|
||||
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
|
||||
'only_matching': True,
|
||||
@@ -66,55 +73,42 @@ class TenPlayIE(InfoExtractor):
|
||||
'X': 18,
|
||||
}
|
||||
|
||||
def _get_bearer_token(self, video_id):
|
||||
username, password = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
|
||||
_timestamp = dt.datetime.now().strftime('%Y%m%d000000')
|
||||
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
|
||||
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
|
||||
'X-Network-Ten-Auth': _auth_header,
|
||||
}, data=urlencode_postdata({
|
||||
'email': username,
|
||||
'password': password,
|
||||
}))
|
||||
return 'Bearer ' + data['jwt']['accessToken']
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://10play.com.au/api/v1/videos/' + content_id, content_id)
|
||||
headers = {}
|
||||
|
||||
if data.get('memberGated') is True:
|
||||
_token = self._get_bearer_token(content_id)
|
||||
headers = {'Authorization': _token}
|
||||
|
||||
_video_url = self._download_json(
|
||||
data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
|
||||
headers=headers).get('source')
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
_video_url), content_id).url
|
||||
video_data = self._download_json(
|
||||
f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}',
|
||||
content_id, 'Downloading video JSON')
|
||||
m3u8_url = self._request_webpage(
|
||||
HEADRequest(video_data['items'][0]['HLSURL']),
|
||||
content_id, 'Checking stream URL').url
|
||||
if '10play-not-in-oz' in m3u8_url:
|
||||
self.raise_geo_restricted(countries=['AU'])
|
||||
# Attempt to get a higher quality stream
|
||||
m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'formats': formats,
|
||||
'subtitles': {'en': [{'url': data.get('captionUrl')}]} if data.get('captionUrl') else None,
|
||||
'id': data.get('altId') or content_id,
|
||||
'duration': data.get('duration'),
|
||||
'title': data.get('subtitle'),
|
||||
'alt_title': data.get('title'),
|
||||
'description': data.get('description'),
|
||||
'age_limit': self._AUS_AGES.get(data.get('classification')),
|
||||
'series': data.get('tvShow'),
|
||||
'season_number': int_or_none(data.get('season')),
|
||||
'episode_number': int_or_none(data.get('episode')),
|
||||
'timestamp': data.get('published'),
|
||||
'thumbnail': data.get('imageUrl'),
|
||||
'subtitles': {'en': [{'url': data['captionUrl']}]} if url_or_none(data.get('captionUrl')) else None,
|
||||
'uploader': 'Channel 10',
|
||||
'uploader_id': '2199827728001',
|
||||
**traverse_obj(data, {
|
||||
'id': ('altId', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'title': ('subtitle', {str}),
|
||||
'alt_title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'age_limit': ('classification', {self._AUS_AGES.get}),
|
||||
'series': ('tvShow', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('published', {int_or_none}),
|
||||
'thumbnail': ('imageUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_qs,
|
||||
qualities,
|
||||
remove_start,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
@@ -254,7 +253,16 @@ class TikTokBaseIE(InfoExtractor):
|
||||
|
||||
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
||||
# TODO: Extract text positioning info
|
||||
|
||||
EXT_MAP = { # From lowest to highest preference
|
||||
'creator_caption': 'json',
|
||||
'srt': 'srt',
|
||||
'webvtt': 'vtt',
|
||||
}
|
||||
preference = qualities(tuple(EXT_MAP.values()))
|
||||
|
||||
subtitles = {}
|
||||
|
||||
# aweme/detail endpoint subs
|
||||
captions_info = traverse_obj(
|
||||
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
|
||||
@@ -278,8 +286,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||
if not caption.get('url'):
|
||||
continue
|
||||
subtitles.setdefault(caption.get('lang') or 'en', []).append({
|
||||
'ext': remove_start(caption.get('caption_format'), 'web'),
|
||||
'url': caption['url'],
|
||||
'ext': EXT_MAP.get(caption.get('Format')),
|
||||
})
|
||||
# webpage subs
|
||||
if not subtitles:
|
||||
@@ -288,9 +296,14 @@ class TikTokBaseIE(InfoExtractor):
|
||||
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
|
||||
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
|
||||
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
|
||||
'ext': remove_start(caption.get('Format'), 'web'),
|
||||
'url': caption['Url'],
|
||||
'ext': EXT_MAP.get(caption.get('Format')),
|
||||
})
|
||||
|
||||
# Deprioritize creator_caption json since it can't be embedded or used by media players
|
||||
for lang, subs_list in subtitles.items():
|
||||
subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))
|
||||
|
||||
return subtitles
|
||||
|
||||
def _parse_url_key(self, url_key):
|
||||
@@ -529,16 +542,12 @@ class TikTokBaseIE(InfoExtractor):
|
||||
**COMMON_FORMAT_INFO,
|
||||
'format_id': 'download',
|
||||
'url': self._proto_relative_url(download_url),
|
||||
'format_note': 'watermarked',
|
||||
'preference': -2,
|
||||
})
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
|
||||
f.update({
|
||||
'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
|
||||
'preference': f.get('preference') or -2,
|
||||
})
|
||||
|
||||
# Is it a slideshow with only audio for download?
|
||||
if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})):
|
||||
audio_url = aweme_detail['music']['playUrl']
|
||||
@@ -552,7 +561,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
return formats
|
||||
# Filter out broken formats, see https://github.com/yt-dlp/yt-dlp/issues/11034
|
||||
return [f for f in formats if urllib.parse.urlparse(f['url']).hostname != 'www.tiktok.com']
|
||||
|
||||
def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False):
|
||||
author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {
|
||||
|
||||
@@ -1,60 +1,29 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TVAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
|
||||
IE_NAME = 'tvaplus'
|
||||
IE_DESC = 'TVA+'
|
||||
_VALID_URL = r'https?://(?:www\.)?tvaplus\.ca/(?:[^/?#]+/)*[\w-]+-(?P<id>\d+)(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.tva.ca/details/_5596811470001',
|
||||
'info_dict': {
|
||||
'id': '5596811470001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
|
||||
'uploader_id': '5481942443001',
|
||||
'upload_date': '20171003',
|
||||
'timestamp': 1507064617,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'https://video.tva.ca/details/_5596811470001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
||||
|
||||
class QubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
|
||||
'url': 'https://www.tvaplus.ca/tva/alerte-amber/saison-1/episode-01-1000036619',
|
||||
'md5': '949490fd0e7aee11d0543777611fbd53',
|
||||
'info_dict': {
|
||||
'id': '6084352463001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ép 01. Mon dernier jour',
|
||||
'title': 'Mon dernier jour',
|
||||
'uploader_id': '5481942443001',
|
||||
'upload_date': '20190907',
|
||||
'timestamp': 1567899756,
|
||||
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
'episode': 'Ép 01. Mon dernier jour',
|
||||
'episode': 'Mon dernier jour',
|
||||
'episode_number': 1,
|
||||
'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
|
||||
'duration': 2625.963,
|
||||
@@ -64,23 +33,36 @@ class QubIE(InfoExtractor):
|
||||
'channel': 'TVA',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.tvaplus.ca/tva/le-baiser-du-barbu/le-baiser-du-barbu-886644190',
|
||||
'info_dict': {
|
||||
'id': '6354448043112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Baiser du barbu',
|
||||
'uploader_id': '5481942443001',
|
||||
'upload_date': '20240606',
|
||||
'timestamp': 1717694023,
|
||||
'description': 'md5:025b1219086c1cbf4bc27e4e034e8b57',
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
'episode': 'Le Baiser du barbu',
|
||||
'tags': ['fullepisode', 'films'],
|
||||
'duration': 6053.504,
|
||||
'series': 'Le Baiser du barbu',
|
||||
'channel': 'TVA',
|
||||
},
|
||||
}]
|
||||
# reference_id also works with old account_id(5481942443001)
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
|
||||
_BC_URL_TMPL = 'https://players.brightcove.net/5481942443001/default_default/index.html?videoId={}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, entity_id)
|
||||
entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
|
||||
entity = self._search_nextjs_data(webpage, entity_id)['props']['pageProps']['staticEntity']
|
||||
video_id = entity['videoId']
|
||||
episode = strip_or_none(entity.get('name'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://videos.tva.ca/details/_{video_id}',
|
||||
'ie_key': TVAIE.ie_key(),
|
||||
'url': smuggle_url(self._BC_URL_TMPL.format(video_id), {'geo_countries': ['CA']}),
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': episode,
|
||||
'episode': episode,
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class TVerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'skip': 'videos are only available for 7 days',
|
||||
'url': 'https://tver.jp/episodes/ep83nf3w4p',
|
||||
@@ -23,6 +23,20 @@ class TVerIE(InfoExtractor):
|
||||
'channel': 'テレビ朝日',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
|
||||
'info_dict': {
|
||||
'id': '6359578055112',
|
||||
'ext': 'mp4',
|
||||
'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
|
||||
'timestamp': 1722279928,
|
||||
'upload_date': '20240729',
|
||||
'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
|
||||
'uploader_id': '4774017240001',
|
||||
'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
|
||||
'duration': 670.571,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://tver.jp/corner/f0103888',
|
||||
'only_matching': True,
|
||||
@@ -47,7 +61,15 @@ class TVerIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
if video_type not in {'series', 'episodes'}:
|
||||
|
||||
if video_type == 'olympic/paris2024/video':
|
||||
# Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
|
||||
# https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
|
||||
return self.url_result(smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
|
||||
{'geo_countries': ['JP']}), 'BrightcoveNew')
|
||||
|
||||
elif video_type not in {'series', 'episodes'}:
|
||||
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
|
||||
video_id = self._match_id(self._search_regex(
|
||||
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
class TVN24IE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:(?!eurosport)[^/]+\.)?tvn24(?:bis)?\.pl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
|
||||
'md5': 'fbdec753d7bc29d96036808275f2130c',
|
||||
|
||||
@@ -270,7 +270,7 @@ class TwitCastingLiveIE(InfoExtractor):
|
||||
|
||||
|
||||
class TwitCastingUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1764,7 +1764,7 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'release_timestamp': 1659904215,
|
||||
'release_date': '20220807',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# post_live/TimedOut but downloadable
|
||||
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
|
||||
@@ -1780,6 +1780,8 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'upload_date': '20230413',
|
||||
'release_timestamp': 1681839000,
|
||||
'release_date': '20230418',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
'container': 'm4a_dash', # audio-only format fixup is applied
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -1790,11 +1792,31 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'ext': 'm4a',
|
||||
'title': 'あ',
|
||||
'description': 'Twitter Space participated by nobody yet',
|
||||
'uploader': '息根とめる🔪Twitchで復活',
|
||||
'uploader': '息根とめる',
|
||||
'uploader_id': 'tomeru_ikinone',
|
||||
'live_status': 'was_live',
|
||||
'timestamp': 1685617198,
|
||||
'upload_date': '20230601',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
'container': 'm4a_dash', # audio-only format fixup is applied
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Video Space
|
||||
'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
|
||||
'info_dict': {
|
||||
'id': '1DXGydznBYWKM',
|
||||
'ext': 'mp4',
|
||||
'title': 'America and Israel’s “special relationship”',
|
||||
'description': 'Twitter Space participated by nobody yet',
|
||||
'uploader': 'Candace Owens',
|
||||
'uploader_id': 'RealCandaceO',
|
||||
'live_status': 'was_live',
|
||||
'timestamp': 1723931351,
|
||||
'upload_date': '20240817',
|
||||
'release_timestamp': 1723932000,
|
||||
'release_date': '20240817',
|
||||
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
@@ -1854,13 +1876,17 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
source = traverse_obj(
|
||||
self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
|
||||
('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
|
||||
formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
|
||||
source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
|
||||
headers=headers, fatal=False) if source else []
|
||||
for fmt in formats:
|
||||
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
|
||||
if not is_live:
|
||||
fmt['container'] = 'm4a_dash'
|
||||
is_audio_space = source and 'audio-space' in source
|
||||
formats = self._extract_m3u8_formats(
|
||||
source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
|
||||
# XXX: Some audio-only Spaces need ffmpeg as downloader
|
||||
entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
|
||||
live=is_live, headers=headers, fatal=False) if source else []
|
||||
if is_audio_space:
|
||||
for fmt in formats:
|
||||
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
|
||||
if not is_live:
|
||||
fmt['container'] = 'm4a_dash'
|
||||
|
||||
participants = ', '.join(traverse_obj(
|
||||
space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
|
||||
|
||||
@@ -49,6 +49,7 @@ class KnownDRMIE(UnsupportedInfoExtractor):
|
||||
r'amazon\.(?:\w{2}\.)?\w+/gp/video',
|
||||
r'music\.amazon\.(?:\w{2}\.)?\w+',
|
||||
r'(?:watch|front)\.njpwworld\.com',
|
||||
r'qub\.ca/vrai',
|
||||
)
|
||||
|
||||
_TESTS = [{
|
||||
@@ -149,6 +150,9 @@ class KnownDRMIE(UnsupportedInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.qub.ca/vrai/l-effet-bocuse-d-or/saison-1/l-effet-bocuse-d-or-saison-1-bande-annonce-1098225063',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
148
yt_dlp/extractor/vidflex.py
Normal file
148
yt_dlp/extractor/vidflex.py
Normal file
@@ -0,0 +1,148 @@
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class VidflexIE(InfoExtractor):
|
||||
_DOMAINS_RE = [
|
||||
r'[^.]+\.vidflex\.tv',
|
||||
r'(?:www\.)?acactv\.ca',
|
||||
r'(?:www\.)?albertalacrossetv\.com',
|
||||
r'(?:www\.)?cjfltv\.com',
|
||||
r'(?:www\.)?figureitoutbaseball\.com',
|
||||
r'(?:www\.)?ocaalive\.com',
|
||||
r'(?:www\.)?pegasussports\.tv',
|
||||
r'(?:www\.)?praxisseries\.ca',
|
||||
r'(?:www\.)?silenticetv\.com',
|
||||
r'(?:www\.)?tuffhedemantv\.com',
|
||||
r'(?:www\.)?watchfuntv\.com',
|
||||
r'live\.ofsaa\.on\.ca',
|
||||
r'tv\.procoro\.ca',
|
||||
r'tv\.realcastmedia\.net',
|
||||
r'tv\.fringetheatre\.ca',
|
||||
r'video\.haisla\.ca',
|
||||
r'video\.hockeycanada\.ca',
|
||||
r'video\.huuayaht\.org',
|
||||
r'video\.turningpointensemble\.ca',
|
||||
r'videos\.livingworks\.net',
|
||||
r'videos\.telusworldofscienceedmonton\.ca',
|
||||
r'watch\.binghamtonbulldogs\.com',
|
||||
r'watch\.rekindle\.tv',
|
||||
r'watch\.wpca\.com',
|
||||
]
|
||||
_VALID_URL = rf'https?://(?:{"|".join(_DOMAINS_RE)})/[a-z]{{2}}(?:-[a-z]{{2}})?/c/[\w-]+\.(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.hockeycanada.ca/en/c/nwt-micd-up-with-jamie-lee-rattray.107486',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# m3u8 + https
|
||||
'url': 'https://video.hockeycanada.ca/en-us/c/nwt-micd-up-with-jamie-lee-rattray.107486',
|
||||
'info_dict': {
|
||||
'id': '107486',
|
||||
'title': 'NWT: Mic’d up with Jamie Lee Rattray',
|
||||
'ext': 'mp4',
|
||||
'duration': 115,
|
||||
'timestamp': 1634310409,
|
||||
'upload_date': '20211015',
|
||||
'tags': ['English', '2021', "National Women's Team"],
|
||||
'description': 'md5:efb1cf6165b48cc3f5555c4262dd5b23',
|
||||
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://video.hockeycanada.ca/en/c/mwc-remembering-the-wild-ride-in-riga.112307',
|
||||
'info_dict': {
|
||||
'id': '112307',
|
||||
'title': 'MWC: Remembering the wild ride in Riga',
|
||||
'ext': 'mp4',
|
||||
'duration': 322,
|
||||
'timestamp': 1716235607,
|
||||
'upload_date': '20240520',
|
||||
'tags': ['English', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
|
||||
'description': r're:.+Canada’s National Men’s Team.+',
|
||||
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# the same video in French
|
||||
'url': 'https://video.hockeycanada.ca/fr/c/cmm-retour-sur-un-parcours-endiable-a-riga.112304',
|
||||
'info_dict': {
|
||||
'id': '112304',
|
||||
'title': 'CMM : Retour sur un parcours endiablé à Riga',
|
||||
'ext': 'mp4',
|
||||
'duration': 322,
|
||||
'timestamp': 1716235545,
|
||||
'upload_date': '20240520',
|
||||
'tags': ['French', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
|
||||
'description': 'md5:cf825222882a3dab1cd62cffcf3b4d1f',
|
||||
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://myfbcgreenville.vidflex.tv/en/c/may-12th-2024.658',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.figureitoutbaseball.com/en/c/fiob-podcast-14-dan-bertolini-ncaa-d1-head-coach-recorded-11-29-2018.1367',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videos.telusworldofscienceedmonton.ca/en/c/the-aurora-project-timelapse-4.577',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tuffhedemantv.com/en/c/2022-tuff-hedeman-tour-hobbs-nm-january-22.227',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.albertalacrossetv.com/en/c/up-floor-ground-balls-one-more.3449',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.silenticetv.com/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://jphl.vidflex.tv/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = self._html_search_regex(
|
||||
r'content_api:\s*(["\'])(?P<url>https?://(?:(?!\1).)+)\1', webpage, 'content api url', group='url')
|
||||
media_config = traverse_obj(
|
||||
self._download_json(data_url, video_id),
|
||||
('config', {base64.b64decode}, {bytes.decode}, {json.loads}, {dict}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': list(self._yield_formats(media_config, video_id)),
|
||||
**self._search_json_ld(
|
||||
webpage.replace('/*<![CDATA[*/', '').replace('/*]]>*/', ''), video_id),
|
||||
}
|
||||
|
||||
def _yield_formats(self, media_config, video_id):
|
||||
for media_source in traverse_obj(media_config, ('media', 'source', lambda _, v: url_or_none(v['src']))):
|
||||
media_url = media_source['src']
|
||||
media_type = mimetype2ext(media_source.get('type'))
|
||||
|
||||
if media_type == 'm3u8':
|
||||
yield from self._extract_m3u8_formats(media_url, video_id, fatal=False, m3u8_id='hls')
|
||||
elif media_type == 'mp4':
|
||||
bitrate = self._search_regex(r'_(\d+)k\.mp4', media_url, 'bitrate', default=None)
|
||||
yield {
|
||||
'format_id': join_nonempty('http', bitrate),
|
||||
'url': media_url,
|
||||
'ext': 'mp4',
|
||||
'tbr': int_or_none(bitrate),
|
||||
}
|
||||
else:
|
||||
yield {
|
||||
'url': media_url,
|
||||
'ext': media_type,
|
||||
}
|
||||
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
@@ -146,6 +147,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
})
|
||||
|
||||
# TODO: fix handling of 308 status code returned for live archive manifest requests
|
||||
QUALITIES = ('low', 'medium', 'high')
|
||||
quality = qualities(QUALITIES)
|
||||
sep_pattern = r'/sep/video/'
|
||||
for files_type in ('hls', 'dash'):
|
||||
for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
|
||||
@@ -166,6 +169,11 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
|
||||
note=f'Downloading {cdn_name} m3u8 information',
|
||||
fatal=False)
|
||||
# m3u8 doesn't give audio bitrates; need to prioritize based on GROUP-ID
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10854
|
||||
for f in fmts:
|
||||
if mobj := re.search(rf'audio-({"|".join(QUALITIES)})', f['format_id']):
|
||||
f['quality'] = quality(mobj.group(1))
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif files_type == 'dash':
|
||||
@@ -212,16 +220,6 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
owner = video_data.get('owner') or {}
|
||||
video_uploader_url = owner.get('url')
|
||||
|
||||
duration = int_or_none(video_data.get('duration'))
|
||||
chapter_data = try_get(config, lambda x: x['embed']['chapters']) or []
|
||||
chapters = [{
|
||||
'title': current_chapter.get('title'),
|
||||
'start_time': current_chapter.get('timecode'),
|
||||
'end_time': next_chapter.get('timecode'),
|
||||
} for current_chapter, next_chapter in zip(chapter_data, chapter_data[1:] + [{'timecode': duration}])]
|
||||
if chapters and chapters[0]['start_time']: # Chapters may not start from 0
|
||||
chapters[:0] = [{'title': '<Untitled>', 'start_time': 0, 'end_time': chapters[0]['start_time']}]
|
||||
|
||||
return {
|
||||
'id': str_or_none(video_data.get('id')) or video_id,
|
||||
'title': video_title,
|
||||
@@ -229,8 +227,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
|
||||
'uploader_url': video_uploader_url,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'chapters': chapters or None,
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'chapters': sorted(traverse_obj(config, (
|
||||
'embed', 'chapters', lambda _, v: int(v['timecode']) is not None, {
|
||||
'title': ('title', {str}),
|
||||
'start_time': ('timecode', {int_or_none}),
|
||||
})), key=lambda c: c['start_time']) or None,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
@@ -240,13 +242,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
|
||||
}
|
||||
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None):
|
||||
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
|
||||
video_id, 'Downloading API JSON', headers={
|
||||
'Authorization': f'jwt {jwt_token}',
|
||||
'Accept': 'application/json',
|
||||
}, query={
|
||||
'fields': ','.join((
|
||||
'config_url', 'created_time', 'description', 'download', 'license',
|
||||
'metadata.connections.comments.total', 'metadata.connections.likes.total',
|
||||
'release_time', 'stats.plays')),
|
||||
}, **kwargs)
|
||||
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
|
||||
# Original/source formats are only available when logged in
|
||||
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
|
||||
return
|
||||
|
||||
query = {'action': 'load_download_config'}
|
||||
if unlisted_hash:
|
||||
query['unlisted_hash'] = unlisted_hash
|
||||
download_data = self._download_json(
|
||||
url, video_id, fatal=False, query=query,
|
||||
headers={'X-Requested-With': 'XMLHttpRequest'},
|
||||
url, video_id, 'Loading download config JSON', fatal=False,
|
||||
query=query, headers={'X-Requested-With': 'XMLHttpRequest'},
|
||||
expected_status=(403, 404)) or {}
|
||||
source_file = download_data.get('source_file')
|
||||
download_url = try_get(source_file, lambda x: x['download_url'])
|
||||
@@ -267,15 +286,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'quality': 1,
|
||||
}
|
||||
|
||||
jwt_response = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
|
||||
if not jwt_response.get('jwt'):
|
||||
jwt = jwt or traverse_obj(self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
|
||||
if not jwt:
|
||||
return
|
||||
headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'}
|
||||
original_response = self._download_json(
|
||||
f'https://api.vimeo.com/videos/{video_id}', video_id,
|
||||
headers=headers, fatal=False, expected_status=(403, 404)) or {}
|
||||
for download_data in original_response.get('download') or []:
|
||||
original_response = api_data or self._call_videos_api(
|
||||
video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
|
||||
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
|
||||
download_url = download_data.get('link')
|
||||
if not download_url or download_data.get('quality') != 'source':
|
||||
continue
|
||||
@@ -360,7 +377,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'skip': 'No longer available',
|
||||
},
|
||||
{
|
||||
'url': 'http://player.vimeo.com/video/54469442',
|
||||
'url': 'https://player.vimeo.com/video/54469442',
|
||||
'md5': '619b811a4417aa4abe78dc653becf511',
|
||||
'note': 'Videos that embed the url in the player page',
|
||||
'info_dict': {
|
||||
@@ -376,6 +393,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/68375962',
|
||||
@@ -385,22 +403,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '68375962',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl password protected test video',
|
||||
'timestamp': 1371200155,
|
||||
'timestamp': 1371214555,
|
||||
'upload_date': '20130614',
|
||||
'release_timestamp': 1371214555,
|
||||
'release_date': '20130614',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
|
||||
'uploader_id': 'user18948128',
|
||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||
'duration': 10,
|
||||
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/channels/keypeele/75629013',
|
||||
@@ -424,29 +443,38 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {'format': 'http-1080p'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/76979871',
|
||||
'note': 'Video with subtitles',
|
||||
'info_dict': {
|
||||
'id': '76979871',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'The New Vimeo Player (You Know, For Videos)',
|
||||
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
|
||||
'timestamp': 1381846109,
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'timestamp': 1381860509,
|
||||
'upload_date': '20131015',
|
||||
'release_timestamp': 1381860509,
|
||||
'release_date': '20131015',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
|
||||
'uploader_id': 'staff',
|
||||
'uploader': 'Vimeo Staff',
|
||||
'uploader': 'Vimeo',
|
||||
'duration': 62,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/452001751-8216e0571c251a09d7a8387550942d89f7f86f6398f8ed886e639b0dd50d3c90-d_1280',
|
||||
'subtitles': {
|
||||
'de': [{'ext': 'vtt'}],
|
||||
'en': [{'ext': 'vtt'}],
|
||||
'es': [{'ext': 'vtt'}],
|
||||
'fr': [{'ext': 'vtt'}],
|
||||
'de': 'count:3',
|
||||
'en': 'count:3',
|
||||
'es': 'count:3',
|
||||
'fr': 'count:3',
|
||||
},
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'expected_warnings': [
|
||||
'Ignoring subtitle tracks found in the HLS manifest',
|
||||
'Failed to parse XML: not well-formed',
|
||||
],
|
||||
},
|
||||
{
|
||||
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
|
||||
@@ -462,11 +490,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 118,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# contains original format
|
||||
# contains Original format
|
||||
'url': 'https://vimeo.com/33951933',
|
||||
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
|
||||
# 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
|
||||
'info_dict': {
|
||||
'id': '33951933',
|
||||
'ext': 'mp4',
|
||||
@@ -482,15 +511,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
|
||||
'like_count': int,
|
||||
'tags': 'count:11',
|
||||
},
|
||||
# 'params': {'format': 'Original'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'note': 'Contains original format not accessible in webpage',
|
||||
'note': 'Contains source format not accessible in webpage',
|
||||
'url': 'https://vimeo.com/393756517',
|
||||
'md5': 'c464af248b592190a5ffbb5d33f382b0',
|
||||
# 'md5': 'c464af248b592190a5ffbb5d33f382b0',
|
||||
'info_dict': {
|
||||
'id': '393756517',
|
||||
'ext': 'mov',
|
||||
# 'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1582642091,
|
||||
'uploader_id': 'frameworkla',
|
||||
'title': 'Straight To Hell - Sabrina: Netflix',
|
||||
@@ -501,6 +534,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/frameworkla',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# only available via https://vimeo.com/channels/tributes/6213729 and
|
||||
@@ -517,16 +552,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'channel_id': 'tributes',
|
||||
'timestamp': 1250886430,
|
||||
'upload_date': '20090821',
|
||||
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'duration': 321,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
|
||||
'like_count': int,
|
||||
'tags': ['[the shining', 'vimeohq', 'cv', 'vimeo tribute]'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# redirects to ondemand extractor and should be passed through it
|
||||
@@ -549,28 +586,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'skip': 'this page is no longer available.',
|
||||
},
|
||||
{
|
||||
'url': 'http://player.vimeo.com/video/68375962',
|
||||
'url': 'https://player.vimeo.com/video/68375962',
|
||||
'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
|
||||
'info_dict': {
|
||||
'id': '68375962',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl password protected test video',
|
||||
'timestamp': 1371200155,
|
||||
'upload_date': '20130614',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
|
||||
'uploader_id': 'user18948128',
|
||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||
'duration': 10,
|
||||
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
|
||||
@@ -598,7 +630,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'user20132939',
|
||||
'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'upload_date': '20150209',
|
||||
'timestamp': 1423518307,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
|
||||
@@ -612,6 +644,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'format': 'best[protocol=https]',
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# source file returns 403: Forbidden
|
||||
@@ -639,11 +672,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'release_date': '20160329',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/138909882',
|
||||
'info_dict': {
|
||||
'id': '138909882',
|
||||
# 'ext': 'm4v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
|
||||
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
|
||||
@@ -651,11 +686,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Firework Champions',
|
||||
'upload_date': '20150910',
|
||||
'timestamp': 1441901895,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/fireworkchampions',
|
||||
'tags': 'count:6',
|
||||
'duration': 229,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'Original',
|
||||
# 'format': 'source',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/channels/staffpicks/143603739',
|
||||
@@ -676,8 +719,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/karimhd',
|
||||
'channel_url': 'https://vimeo.com/channels/staffpicks',
|
||||
'tags': 'count:6',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
||||
@@ -707,6 +752,82 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# chapters must be sorted, see: https://github.com/yt-dlp/yt-dlp/issues/5308
|
||||
'url': 'https://player.vimeo.com/video/756714419',
|
||||
'info_dict': {
|
||||
'id': '756714419',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dr Arielle Schwartz - Therapeutic yoga for optimum sleep',
|
||||
'uploader': 'Alex Howard',
|
||||
'uploader_id': 'user54729178',
|
||||
'uploader_url': 'https://vimeo.com/user54729178',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1520099929-[\da-f]+-d_1280',
|
||||
'duration': 2636,
|
||||
'chapters': [
|
||||
{'start_time': 0, 'end_time': 10, 'title': '<Untitled Chapter 1>'},
|
||||
{'start_time': 10, 'end_time': 106, 'title': 'Welcoming Dr Arielle Schwartz'},
|
||||
{'start_time': 106, 'end_time': 305, 'title': 'What is therapeutic yoga?'},
|
||||
{'start_time': 305, 'end_time': 594, 'title': 'Vagal toning practices'},
|
||||
{'start_time': 594, 'end_time': 888, 'title': 'Trauma and difficulty letting go'},
|
||||
{'start_time': 888, 'end_time': 1059, 'title': "Dr Schwartz' insomnia experience"},
|
||||
{'start_time': 1059, 'end_time': 1471, 'title': 'A strategy for helping sleep issues'},
|
||||
{'start_time': 1471, 'end_time': 1667, 'title': 'Yoga nidra'},
|
||||
{'start_time': 1667, 'end_time': 2121, 'title': 'Wisdom in stillness'},
|
||||
{'start_time': 2121, 'end_time': 2386, 'title': 'What helps us be more able to let go?'},
|
||||
{'start_time': 2386, 'end_time': 2510, 'title': 'Practical tips to help ourselves'},
|
||||
{'start_time': 2510, 'end_time': 2636, 'title': 'Where to find out more'},
|
||||
],
|
||||
},
|
||||
'params': {
|
||||
'http_headers': {'Referer': 'https://sleepsuperconference.com'},
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# vimeo.com URL with unlisted hash and Original format
|
||||
'url': 'https://vimeo.com/144579403/ec02229140',
|
||||
# 'md5': '6b662c2884e0373183fbde2a0d15cb78',
|
||||
'info_dict': {
|
||||
'id': '144579403',
|
||||
'ext': 'mp4',
|
||||
'title': 'SALESMANSHIP',
|
||||
'description': 'md5:4338302f347a1ff8841b4a3aecaa09f0',
|
||||
'uploader': 'Off the Picture Pictures',
|
||||
'uploader_id': 'offthepicturepictures',
|
||||
'uploader_url': 'https://vimeo.com/offthepicturepictures',
|
||||
'duration': 669,
|
||||
'upload_date': '20151104',
|
||||
'timestamp': 1446607180,
|
||||
'release_date': '20151104',
|
||||
'release_timestamp': 1446607180,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1018638656-[\da-f]+-d_1280',
|
||||
},
|
||||
# 'params': {'format': 'Original'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# player.vimeo.com URL with source format
|
||||
'url': 'https://player.vimeo.com/video/859028877',
|
||||
# 'md5': '19ca3d2463441dee2d2f0671ac2916a2',
|
||||
'info_dict': {
|
||||
'id': '859028877',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ariana Grande - Honeymoon Avenue (Live from London)',
|
||||
'uploader': 'Raja Virdi',
|
||||
'uploader_id': 'rajavirdi',
|
||||
'uploader_url': 'https://vimeo.com/rajavirdi',
|
||||
'duration': 309,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d_1280',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# user playlist alias -> https://vimeo.com/258705797
|
||||
@@ -741,16 +862,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
raise ExtractorError('Wrong video password', expected=True)
|
||||
return checked
|
||||
|
||||
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
|
||||
video_id, 'Downloading API JSON', headers={
|
||||
'Authorization': f'jwt {jwt_token}',
|
||||
'Accept': 'application/json',
|
||||
}, query={
|
||||
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
|
||||
})
|
||||
|
||||
def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
|
||||
@@ -771,6 +882,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
|
||||
info = self._parse_config(self._download_json(
|
||||
video['config_url'], video_id), video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
|
||||
if source_format:
|
||||
info['formats'].append(source_format)
|
||||
|
||||
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
|
||||
info.update({
|
||||
'description': video.get('description'),
|
||||
@@ -872,7 +988,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(
|
||||
redirect_url, video_id, headers)
|
||||
return self._parse_config(config, video_id)
|
||||
info = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash)
|
||||
if source_format:
|
||||
info['formats'].append(source_format)
|
||||
return info
|
||||
|
||||
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
|
||||
if vimeo_config:
|
||||
@@ -1240,8 +1361,22 @@ class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
|
||||
class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vimeo:review'
|
||||
IE_DESC = 'Review pages on vimeo'
|
||||
_VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
|
||||
_VALID_URL = r'https?://vimeo\.com/(?P<user>[^/?#]+)/review/(?P<id>\d+)/(?P<hash>[\da-f]{10})'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/user170863801/review/996447483/a316d6ed8d',
|
||||
'info_dict': {
|
||||
'id': '996447483',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rodeo day 1-_2',
|
||||
'uploader': 'BROADKAST',
|
||||
'uploader_id': 'user170863801',
|
||||
'uploader_url': 'https://vimeo.com/user170863801',
|
||||
'duration': 30,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML'],
|
||||
}, {
|
||||
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
||||
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
||||
'info_dict': {
|
||||
@@ -1255,6 +1390,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/user21297594',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'note': 'video player needs Referer',
|
||||
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
|
||||
@@ -1286,26 +1422,23 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_url, video_id = self._match_valid_url(url).groups()
|
||||
data = self._download_json(
|
||||
page_url.replace('/review/', '/review/data/'), video_id)
|
||||
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
|
||||
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
|
||||
data = self._download_json(data_url, video_id)
|
||||
viewer = {}
|
||||
if data.get('isLocked') is True:
|
||||
video_password = self._get_video_password()
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id)
|
||||
webpage = self._verify_video_password(video_id, video_password, viewer['xsrft'])
|
||||
clip_page_config = self._parse_json(self._search_regex(
|
||||
r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
|
||||
webpage, 'clip page config'), video_id)
|
||||
config_url = clip_page_config['player']['config_url']
|
||||
clip_data = clip_page_config.get('clip') or {}
|
||||
else:
|
||||
clip_data = data['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
self._verify_video_password(video_id, video_password, viewer['xsrft'])
|
||||
data = self._download_json(data_url, video_id)
|
||||
clip_data = data['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
config = self._download_json(config_url, video_id)
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
page_url + '/action', video_id)
|
||||
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
|
||||
video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
|
||||
if source_format:
|
||||
info_dict['formats'].append(source_format)
|
||||
info_dict['description'] = clean_html(clip_data.get('description'))
|
||||
|
||||
@@ -90,7 +90,7 @@ class ViuIE(ViuBaseIE):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||
|
||||
for key, value in video_data.items():
|
||||
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||
mobj = re.match(r'subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||
if not mobj:
|
||||
continue
|
||||
subtitles.setdefault(mobj.group('lang'), []).append({
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
@@ -25,16 +26,25 @@ class WistiaBaseIE(InfoExtractor):
|
||||
|
||||
def _download_embed_config(self, config_type, config_id, referer):
|
||||
base_url = self._EMBED_BASE_URL + f'{config_type}/{config_id}'
|
||||
video_password = self.get_param('videopassword')
|
||||
embed_config = self._download_json(
|
||||
base_url + '.json', config_id, headers={
|
||||
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
|
||||
})
|
||||
}, query=filter_dict({'password': video_password}))
|
||||
|
||||
error = traverse_obj(embed_config, 'error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
f'Error while getting the playlist: {error}', expected=True)
|
||||
|
||||
if traverse_obj(embed_config, (
|
||||
'media', ('embed_options', 'embedOptions'), 'plugin',
|
||||
'passwordProtectedVideo', 'on', any)) == 'true':
|
||||
if video_password:
|
||||
raise ExtractorError('Invalid video password', expected=True)
|
||||
raise ExtractorError(
|
||||
'This content is password-protected. Use the --video-password option', expected=True)
|
||||
|
||||
return embed_config
|
||||
|
||||
def _get_real_ext(self, url):
|
||||
|
||||
@@ -1,7 +1,17 @@
|
||||
import base64
|
||||
import math
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import InAdvancePagedList, str_or_none, traverse_obj, try_call
|
||||
from .videa import VideaIE
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class XimalayaBaseIE(InfoExtractor):
|
||||
@@ -11,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor):
|
||||
class XimalayaIE(XimalayaBaseIE):
|
||||
IE_NAME = 'ximalaya'
|
||||
IE_DESC = '喜马拉雅FM'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ximalaya.com/sound/47740352/',
|
||||
@@ -71,23 +81,92 @@ class XimalayaIE(XimalayaBaseIE):
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# VIP-restricted audio
|
||||
'url': 'https://www.ximalaya.com/sound/562111701',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_filename(file_id, seed):
|
||||
cgstr = ''
|
||||
key = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890'
|
||||
for _ in key:
|
||||
seed = float(int(211 * seed + 30031) % 65536)
|
||||
r = int(seed / 65536 * len(key))
|
||||
cgstr += key[r]
|
||||
key = key.replace(key[r], '')
|
||||
parts = file_id.split('*')
|
||||
filename = ''.join(cgstr[int(part)] for part in parts if part.isdecimal())
|
||||
if not filename.startswith('/'):
|
||||
filename = '/' + filename
|
||||
return filename
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_url_params(encrypted_params):
|
||||
params = VideaIE.rc4(
|
||||
base64.b64decode(encrypted_params), 'xkt3a41psizxrh9l').split('-')
|
||||
# sign, token, timestamp
|
||||
return params[1], params[2], params[3]
|
||||
|
||||
def _real_extract(self, url):
|
||||
scheme = 'https' if url.startswith('https') else 'http'
|
||||
|
||||
audio_id = self._match_id(url)
|
||||
audio_info_file = f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json'
|
||||
audio_info = self._download_json(
|
||||
audio_info_file, audio_id,
|
||||
f'Downloading info json {audio_info_file}', 'Unable to download info file')
|
||||
f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json', audio_id,
|
||||
'Downloading info json', 'Unable to download info file')
|
||||
|
||||
formats = [{
|
||||
formats = []
|
||||
# NOTE: VIP-restricted audio
|
||||
if audio_info.get('is_paid'):
|
||||
ts = int(time.time())
|
||||
vip_info = self._download_json(
|
||||
f'{scheme}://mpay.ximalaya.com/mobile/track/pay/{audio_id}/{ts}',
|
||||
audio_id, 'Downloading VIP info json', 'Unable to download VIP info file',
|
||||
query={'device': 'pc', 'isBackend': 'true', '_': ts})
|
||||
filename = self._decrypt_filename(vip_info['fileId'], vip_info['seed'])
|
||||
sign, token, timestamp = self._decrypt_url_params(vip_info['ep'])
|
||||
vip_url = update_url_query(
|
||||
f'{vip_info["domain"]}/download/{vip_info["apiVersion"]}{filename}', {
|
||||
'sign': sign,
|
||||
'token': token,
|
||||
'timestamp': timestamp,
|
||||
'buy_key': vip_info['buyKey'],
|
||||
'duration': vip_info['duration'],
|
||||
})
|
||||
fmt = {
|
||||
'format_id': 'vip',
|
||||
'url': vip_url,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if '_preview_' in vip_url:
|
||||
self.report_warning(
|
||||
f'This tracks requires a VIP account. Using a sample instead. {self._login_hint()}')
|
||||
fmt.update({
|
||||
'format_note': 'Sample',
|
||||
'preference': -10,
|
||||
**traverse_obj(vip_info, {
|
||||
'filesize': ('sampleLength', {int_or_none}),
|
||||
'duration': ('sampleDuration', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
else:
|
||||
fmt.update(traverse_obj(vip_info, {
|
||||
'filesize': ('totalLength', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}))
|
||||
|
||||
fmt['abr'] = try_call(lambda: fmt['filesize'] * 8 / fmt['duration'] / 1024)
|
||||
formats.append(fmt)
|
||||
|
||||
formats.extend([{
|
||||
'format_id': f'{bps}k',
|
||||
'url': audio_info[k],
|
||||
'abr': bps,
|
||||
'vcodec': 'none',
|
||||
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)]
|
||||
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)])
|
||||
|
||||
thumbnails = []
|
||||
for k in audio_info:
|
||||
|
||||
@@ -3,16 +3,13 @@ from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class XinpianchangIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://www\.xinpianchang\.com/(?P<id>[^/]+?)(?:\D|$)'
|
||||
IE_NAME = 'xinpianchang'
|
||||
IE_DESC = 'xinpianchang.com'
|
||||
_VALID_URL = r'https?://(www\.)?xinpianchang\.com/(?P<id>a\d+)'
|
||||
IE_DESC = '新片场'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.xinpianchang.com/a11766551',
|
||||
'info_dict': {
|
||||
@@ -49,11 +46,11 @@ class XinpianchangIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id=video_id)
|
||||
domain = self.find_value_with_regex(var='requireNewDomain', webpage=webpage)
|
||||
vid = self.find_value_with_regex(var='vid', webpage=webpage)
|
||||
app_key = self.find_value_with_regex(var='modeServerAppKey', webpage=webpage)
|
||||
api = update_url_query(f'{domain}/mod/api/v2/media/{vid}', {'appKey': app_key})
|
||||
data = self._download_json(api, video_id=video_id)['data']
|
||||
video_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['detail']['video']
|
||||
|
||||
data = self._download_json(
|
||||
f'https://mod-api.xinpianchang.com/mod/api/v2/media/{video_data["vid"]}', video_id,
|
||||
query={'appKey': video_data['appKey']})['data']
|
||||
formats, subtitles = [], {}
|
||||
for k, v in data.get('resource').items():
|
||||
if k in ('dash', 'hls'):
|
||||
@@ -72,6 +69,10 @@ class XinpianchangIE(InfoExtractor):
|
||||
'width': int_or_none(prog.get('width')),
|
||||
'height': int_or_none(prog.get('height')),
|
||||
'ext': 'mp4',
|
||||
'http_headers': {
|
||||
# NB: Server returns 403 without the Range header
|
||||
'Range': 'bytes=0-',
|
||||
},
|
||||
} for prog in v if prog.get('url') or []])
|
||||
|
||||
return {
|
||||
@@ -87,6 +88,3 @@ class XinpianchangIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def find_value_with_regex(self, var, webpage):
|
||||
return self._search_regex(rf'var\s{var}\s=\s\"(?P<vid>[^\"]+)\"', webpage, name=var)
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class YleAreenaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)'
|
||||
_VALID_URL = r'https?://areena\.yle\.fi/(?P<podcast>podcastit/)?(?P<id>[\d-]+)'
|
||||
_GEO_COUNTRIES = ['FI']
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -77,7 +77,7 @@ class YleAreenaIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||
info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
||||
video_data = self._download_json(
|
||||
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
|
||||
@@ -103,8 +103,11 @@ class YleAreenaIE(InfoExtractor):
|
||||
'name': sub.get('kind'),
|
||||
})
|
||||
|
||||
kaltura_id = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id'), expected_type=str)
|
||||
if kaltura_id:
|
||||
if is_podcast:
|
||||
info_dict = {
|
||||
'url': video_data['data']['ongoing_ondemand']['media_url'],
|
||||
}
|
||||
elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
|
||||
@@ -114,13 +117,11 @@ class YleAreenaIE(InfoExtractor):
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
info_dict = {'formats': formats}
|
||||
|
||||
return {
|
||||
**info_dict,
|
||||
'id': video_id,
|
||||
'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
|
||||
or episode or info.get('title')),
|
||||
'description': description,
|
||||
|
||||
@@ -136,7 +136,7 @@ class YoukuIE(InfoExtractor):
|
||||
# request basic data
|
||||
basic_data_params = {
|
||||
'vid': video_id,
|
||||
'ccode': '0524',
|
||||
'ccode': '0564',
|
||||
'client_ip': '192.168.1.1',
|
||||
'utid': cna,
|
||||
'client_ts': time.time() / 1000,
|
||||
|
||||
@@ -69,136 +69,179 @@ from ..utils import (
|
||||
)
|
||||
|
||||
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
|
||||
STREAMING_DATA_PO_TOKEN = '__yt_dlp_po_token'
|
||||
|
||||
# any clients starting with _ cannot be explicitly requested by the user
|
||||
INNERTUBE_CLIENTS = {
|
||||
'web': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': '2.20220801.00.00',
|
||||
'clientVersion': '2.20240726.00.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'REQUIRE_PO_TOKEN': True,
|
||||
},
|
||||
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
|
||||
'web_safari': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': '2.20240726.00.00',
|
||||
'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'REQUIRE_PO_TOKEN': True,
|
||||
},
|
||||
'web_embedded': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB_EMBEDDED_PLAYER',
|
||||
'clientVersion': '1.20220731.00.00',
|
||||
'clientVersion': '1.20240723.01.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
|
||||
},
|
||||
'web_music': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
|
||||
'INNERTUBE_HOST': 'music.youtube.com',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB_REMIX',
|
||||
'clientVersion': '1.20220727.01.00',
|
||||
'clientVersion': '1.20240724.00.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
|
||||
},
|
||||
'web_creator': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'WEB_CREATOR',
|
||||
'clientVersion': '1.20220726.00.00',
|
||||
'clientVersion': '1.20240723.03.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
|
||||
},
|
||||
'android': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID',
|
||||
'clientVersion': '19.09.37',
|
||||
'clientVersion': '19.29.37',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
|
||||
'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip',
|
||||
'osName': 'Android',
|
||||
'osVersion': '11',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
},
|
||||
'android_embedded': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_EMBEDDED_PLAYER',
|
||||
'clientVersion': '19.09.37',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
'REQUIRE_PO_TOKEN': True,
|
||||
},
|
||||
'android_music': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_MUSIC',
|
||||
'clientVersion': '6.42.52',
|
||||
'clientVersion': '7.11.50',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip',
|
||||
'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip',
|
||||
'osName': 'Android',
|
||||
'osVersion': '11',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
'REQUIRE_PO_TOKEN': True,
|
||||
},
|
||||
'android_creator': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_CREATOR',
|
||||
'clientVersion': '22.30.100',
|
||||
'clientVersion': '24.30.100',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip',
|
||||
'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip',
|
||||
'osName': 'Android',
|
||||
'osVersion': '11',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
'REQUIRE_PO_TOKEN': True,
|
||||
},
|
||||
# YouTube Kids videos aren't returned on this client for some reason
|
||||
'android_vr': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_VR',
|
||||
'clientVersion': '1.57.29',
|
||||
'deviceMake': 'Oculus',
|
||||
'deviceModel': 'Quest 3',
|
||||
'androidSdkVersion': 32,
|
||||
'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
|
||||
'osName': 'Android',
|
||||
'osVersion': '12L',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
},
|
||||
'android_testsuite': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_TESTSUITE',
|
||||
'clientVersion': '1.9',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip',
|
||||
'osName': 'Android',
|
||||
'osVersion': '11',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 30,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
'PLAYER_PARAMS': '2AMB',
|
||||
},
|
||||
# This client only has legacy formats and storyboards
|
||||
'android_producer': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_PRODUCER',
|
||||
'clientVersion': '0.111.1',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip',
|
||||
'osName': 'Android',
|
||||
'osVersion': '11',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 91,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
},
|
||||
# iOS clients have HLS live streams. Setting device model to get 60fps formats.
|
||||
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
|
||||
'ios': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS',
|
||||
'clientVersion': '19.09.3',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
|
||||
'clientVersion': '19.29.1',
|
||||
'deviceMake': 'Apple',
|
||||
'deviceModel': 'iPhone16,2',
|
||||
'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
|
||||
'osName': 'iPhone',
|
||||
'osVersion': '17.5.1.21F90',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
},
|
||||
'ios_embedded': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS_MESSAGES_EXTENSION',
|
||||
'clientVersion': '19.09.3',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
},
|
||||
'ios_music': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS_MUSIC',
|
||||
'clientVersion': '6.33.3',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
|
||||
'clientVersion': '7.08.2',
|
||||
'deviceMake': 'Apple',
|
||||
'deviceModel': 'iPhone16,2',
|
||||
'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
|
||||
'osName': 'iPhone',
|
||||
'osVersion': '17.5.1.21F90',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
|
||||
@@ -208,9 +251,12 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS_CREATOR',
|
||||
'clientVersion': '22.33.101',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
|
||||
'clientVersion': '24.30.100',
|
||||
'deviceMake': 'Apple',
|
||||
'deviceModel': 'iPhone16,2',
|
||||
'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
|
||||
'osName': 'iPhone',
|
||||
'osVersion': '17.5.1.21F90',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
|
||||
@@ -219,19 +265,26 @@ INNERTUBE_CLIENTS = {
|
||||
# mweb has 'ultralow' formats
|
||||
# See: https://github.com/yt-dlp/yt-dlp/pull/557
|
||||
'mweb': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'MWEB',
|
||||
'clientVersion': '2.20220801.00.00',
|
||||
'clientVersion': '2.20240726.01.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
|
||||
},
|
||||
'tv': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'TVHTML5',
|
||||
'clientVersion': '7.20240724.13.00',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
||||
},
|
||||
# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
|
||||
# See: https://github.com/zerodytrash/YouTube-Internal-Clients
|
||||
'tv_embedded': {
|
||||
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
|
||||
@@ -249,6 +302,7 @@ INNERTUBE_CLIENTS = {
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -262,7 +316,7 @@ def _split_innertube_client(client_name):
|
||||
|
||||
|
||||
def short_client_name(client_name):
|
||||
main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
|
||||
main, *parts = _split_innertube_client(client_name)[0].split('_')
|
||||
return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
|
||||
|
||||
|
||||
@@ -274,23 +328,19 @@ def build_innertube_clients():
|
||||
priority = qualities(BASE_CLIENTS[::-1])
|
||||
|
||||
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
|
||||
ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
|
||||
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
|
||||
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
|
||||
ytcfg.setdefault('REQUIRE_PO_TOKEN', False)
|
||||
ytcfg.setdefault('PLAYER_PARAMS', None)
|
||||
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
|
||||
|
||||
_, base_client, variant = _split_innertube_client(client)
|
||||
ytcfg['priority'] = 10 * priority(base_client)
|
||||
|
||||
if not variant:
|
||||
INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
|
||||
embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
|
||||
embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
|
||||
embedscreen['priority'] -= 3
|
||||
elif variant == 'embedded':
|
||||
if variant == 'embedded':
|
||||
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
|
||||
ytcfg['priority'] -= 2
|
||||
else:
|
||||
elif variant:
|
||||
ytcfg['priority'] -= 3
|
||||
|
||||
|
||||
@@ -566,9 +616,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
|
||||
or req_api_hostname or self._get_innertube_host(default_client or 'web'))
|
||||
|
||||
def _extract_api_key(self, ytcfg=None, default_client='web'):
|
||||
return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
|
||||
|
||||
def _extract_context(self, ytcfg=None, default_client='web'):
|
||||
context = get_first(
|
||||
(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
|
||||
@@ -614,13 +661,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
real_headers.update({'content-type': 'application/json'})
|
||||
if headers:
|
||||
real_headers.update(headers)
|
||||
api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
|
||||
or api_key or self._extract_api_key(default_client=default_client))
|
||||
return self._download_json(
|
||||
f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
|
||||
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
|
||||
data=json.dumps(data).encode('utf8'), headers=real_headers,
|
||||
query={'key': api_key, 'prettyPrint': 'false'})
|
||||
query=filter_dict({
|
||||
'key': self._configuration_arg(
|
||||
'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0],
|
||||
'prettyPrint': 'false',
|
||||
}, cndn=lambda _, v: v))
|
||||
|
||||
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
|
||||
return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
|
||||
@@ -647,31 +696,46 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
'identity token', default=None, fatal=False)
|
||||
|
||||
@staticmethod
|
||||
def _extract_account_syncid(*args):
|
||||
def _data_sync_id_to_delegated_session_id(self, data_sync_id):
|
||||
if not data_sync_id:
|
||||
return
|
||||
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
|
||||
# and just "user_syncid||" for primary channel. We only want the channel_syncid
|
||||
channel_syncid, _, user_syncid = data_sync_id.partition('||')
|
||||
if user_syncid:
|
||||
return channel_syncid
|
||||
|
||||
def _extract_account_syncid(self, *args):
|
||||
"""
|
||||
Extract syncId required to download private playlists of secondary channels
|
||||
Extract current session ID required to download private playlists of secondary channels
|
||||
@params response and/or ytcfg
|
||||
"""
|
||||
for data in args:
|
||||
# ytcfg includes channel_syncid if on secondary channel
|
||||
delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
|
||||
if delegated_sid:
|
||||
return delegated_sid
|
||||
sync_ids = (try_get(
|
||||
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
|
||||
lambda x: x['DATASYNC_ID']), str) or '').split('||')
|
||||
if len(sync_ids) >= 2 and sync_ids[1]:
|
||||
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
|
||||
# and just "user_syncid||" for primary channel. We only want the channel_syncid
|
||||
return sync_ids[0]
|
||||
# ytcfg includes channel_syncid if on secondary channel
|
||||
if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)):
|
||||
return delegated_sid
|
||||
|
||||
@staticmethod
|
||||
def _extract_visitor_data(*args):
|
||||
data_sync_id = self._extract_data_sync_id(*args)
|
||||
return self._data_sync_id_to_delegated_session_id(data_sync_id)
|
||||
|
||||
def _extract_data_sync_id(self, *args):
|
||||
"""
|
||||
Extract current account dataSyncId.
|
||||
In the format DELEGATED_SESSION_ID||USER_SESSION_ID or USER_SESSION_ID||
|
||||
@params response and/or ytcfg
|
||||
"""
|
||||
if data_sync_id := self._configuration_arg('data_sync_id', [None], ie_key=YoutubeIE, casesense=True)[0]:
|
||||
return data_sync_id
|
||||
|
||||
return traverse_obj(
|
||||
args, (..., ('DATASYNC_ID', ('responseContext', 'mainAppWebResponseContext', 'datasyncId')), {str}, any))
|
||||
|
||||
def _extract_visitor_data(self, *args):
|
||||
"""
|
||||
Extracts visitorData from an API response or ytcfg
|
||||
Appears to be used to track session state
|
||||
"""
|
||||
if visitor_data := self._configuration_arg('visitor_data', [None], ie_key=YoutubeIE, casesense=True)[0]:
|
||||
return visitor_data
|
||||
return get_first(
|
||||
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
|
||||
expected_type=str)
|
||||
@@ -972,7 +1036,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
ep=ep, fatal=True, headers=headers,
|
||||
video_id=item_id, query=query, note=note,
|
||||
context=self._extract_context(ytcfg, default_client),
|
||||
api_key=self._extract_api_key(ytcfg, default_client),
|
||||
api_hostname=api_hostname, default_client=default_client)
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, network_exceptions):
|
||||
@@ -1294,7 +1357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||
}
|
||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
||||
_POTOKEN_EXPERIMENTS = ('51217476', '51217102')
|
||||
_DEFAULT_CLIENTS = ('ios', 'web_creator')
|
||||
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@@ -3129,12 +3192,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self.write_debug(f'Decrypted nsig {s} => {ret}')
|
||||
return ret
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
def _extract_n_function_name(self, jscode, player_url=None):
|
||||
# Examples (with placeholders nfunc, narray, idx):
|
||||
# * .get("n"))&&(b=nfunc(b)
|
||||
# * .get("n"))&&(b=narray[idx](b)
|
||||
# * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
|
||||
# * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||||
# * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
|
||||
# * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||||
funcname, idx = self._search_regex(
|
||||
r'''(?x)(?:\.get\("n"\)\)&&\(b=|b=String\.fromCharCode\(110\),c=a\.get\(b\)\)&&\(c=)
|
||||
(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''',
|
||||
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
||||
if not idx:
|
||||
r'''(?x)
|
||||
(?:
|
||||
\.get\("n"\)\)&&\(b=|
|
||||
(?:
|
||||
b=String\.fromCharCode\(110\)|
|
||||
(?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
|
||||
)
|
||||
(?:
|
||||
,[a-zA-Z0-9_$]+\(a\))?,c=a\.
|
||||
(?:
|
||||
get\(b\)|
|
||||
[a-zA-Z0-9_$]+\[b\]\|\|null
|
||||
)\)&&\(c=|
|
||||
\b(?P<var>[a-zA-Z0-9_$]+)=
|
||||
)(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
|
||||
(?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''',
|
||||
jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
|
||||
if not funcname:
|
||||
self.report_warning(join_nonempty(
|
||||
'Falling back to generic n function search',
|
||||
player_url and f' player = {player_url}', delim='\n'))
|
||||
return self._search_regex(
|
||||
r'''(?xs)
|
||||
;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
|
||||
\s*\{(?:(?!};).)+?["']enhanced_except_''',
|
||||
jscode, 'Initial JS player n function name', group='name')
|
||||
elif not idx:
|
||||
return funcname
|
||||
|
||||
return json.loads(js_to_json(self._search_regex(
|
||||
@@ -3150,7 +3243,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if func_code:
|
||||
return jsi, player_id, func_code
|
||||
|
||||
func_name = self._extract_n_function_name(jscode)
|
||||
func_name = self._extract_n_function_name(jscode, player_url=player_url)
|
||||
|
||||
func_code = jsi.extract_function_code(func_name)
|
||||
|
||||
@@ -3626,6 +3719,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
**cls._get_checkok_params(),
|
||||
}
|
||||
|
||||
def _get_config_po_token(self, client):
|
||||
po_token_strs = self._configuration_arg('po_token', [], ie_key=YoutubeIE, casesense=True)
|
||||
for token_str in po_token_strs:
|
||||
po_token_client, sep, po_token = token_str.partition('+')
|
||||
if not sep:
|
||||
self.report_warning(
|
||||
f'Invalid po_token configuration format. Expected "client+po_token", got "{token_str}"', only_once=True)
|
||||
continue
|
||||
if po_token_client == client:
|
||||
return po_token
|
||||
|
||||
def fetch_po_token(self, client='web', visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
|
||||
# PO Token is bound to visitor_data / Visitor ID when logged out. Must have visitor_data for it to function.
|
||||
if not visitor_data and not self.is_authenticated and player_url:
|
||||
self.report_warning(
|
||||
f'Unable to fetch PO Token for {client} client: Missing required Visitor Data. '
|
||||
f'You may need to pass Visitor Data with --extractor-args "youtube:visitor_data=XXX"')
|
||||
return
|
||||
|
||||
config_po_token = self._get_config_po_token(client)
|
||||
if config_po_token:
|
||||
# PO token is bound to data_sync_id / account Session ID when logged in. However, for the config po_token,
|
||||
# if using first channel in an account then we don't need the data_sync_id anymore...
|
||||
if not data_sync_id and self.is_authenticated and player_url:
|
||||
self.report_warning(
|
||||
f'Got a PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
|
||||
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
|
||||
|
||||
return config_po_token
|
||||
|
||||
# Require PO Token if logged in for external fetching
|
||||
if not data_sync_id and self.is_authenticated and player_url:
|
||||
self.report_warning(
|
||||
f'Unable to fetch PO Token for {client} client: Missing required Data Sync ID for account. '
|
||||
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
|
||||
return
|
||||
|
||||
return self._fetch_po_token(
|
||||
client=client,
|
||||
visitor_data=visitor_data,
|
||||
data_sync_id=data_sync_id,
|
||||
player_url=player_url,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _fetch_po_token(self, client, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
|
||||
"""External PO Token fetch stub"""
|
||||
|
||||
@staticmethod
|
||||
def _is_agegated(player_response):
|
||||
if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
|
||||
@@ -3642,22 +3783,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def _is_unplayable(player_response):
|
||||
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
|
||||
|
||||
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
|
||||
|
||||
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
|
||||
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
|
||||
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
|
||||
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
|
||||
headers = self.generate_api_headers(
|
||||
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
|
||||
ytcfg=player_ytcfg,
|
||||
default_client=client,
|
||||
visitor_data=visitor_data,
|
||||
session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
|
||||
account_syncid=(
|
||||
self._data_sync_id_to_delegated_session_id(data_sync_id)
|
||||
or self._extract_account_syncid(master_ytcfg, initial_pr, player_ytcfg)
|
||||
),
|
||||
)
|
||||
|
||||
yt_query = {
|
||||
'videoId': video_id,
|
||||
}
|
||||
|
||||
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||
if pp_arg:
|
||||
yt_query['params'] = pp_arg
|
||||
default_pp = traverse_obj(
|
||||
INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str}))
|
||||
if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
|
||||
yt_query['params'] = player_params
|
||||
|
||||
if po_token:
|
||||
yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
|
||||
|
||||
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
|
||||
yt_query.update(self._generate_player_context(sts))
|
||||
return self._extract_response(
|
||||
item_id=video_id, ep='player', query=yt_query,
|
||||
@@ -3668,30 +3818,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _get_requested_clients(self, url, smuggled_data):
|
||||
requested_clients = []
|
||||
android_clients = []
|
||||
default = ['ios', 'web']
|
||||
excluded_clients = []
|
||||
allowed_clients = sorted(
|
||||
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
|
||||
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
|
||||
for client in self._configuration_arg('player_client'):
|
||||
if client == 'default':
|
||||
requested_clients.extend(default)
|
||||
requested_clients.extend(self._DEFAULT_CLIENTS)
|
||||
elif client == 'all':
|
||||
requested_clients.extend(allowed_clients)
|
||||
elif client.startswith('-'):
|
||||
excluded_clients.append(client[1:])
|
||||
elif client not in allowed_clients:
|
||||
self.report_warning(f'Skipping unsupported client {client}')
|
||||
elif client.startswith('android'):
|
||||
android_clients.append(client)
|
||||
self.report_warning(f'Skipping unsupported client "{client}"')
|
||||
else:
|
||||
requested_clients.append(client)
|
||||
# Force deprioritization of broken Android clients for format de-duplication
|
||||
requested_clients.extend(android_clients)
|
||||
if not requested_clients:
|
||||
requested_clients = default
|
||||
requested_clients.extend(self._DEFAULT_CLIENTS)
|
||||
for excluded_client in excluded_clients:
|
||||
if excluded_client in requested_clients:
|
||||
requested_clients.remove(excluded_client)
|
||||
if not requested_clients:
|
||||
raise ExtractorError('No player clients have been requested', expected=True)
|
||||
|
||||
if smuggled_data.get('is_music_url') or self.is_music_url(url):
|
||||
requested_clients.extend(
|
||||
f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
|
||||
for requested_client in requested_clients:
|
||||
_, base_client, variant = _split_innertube_client(requested_client)
|
||||
music_client = f'{base_client}_music'
|
||||
if variant != 'music' and music_client in INNERTUBE_CLIENTS:
|
||||
requested_clients.append(music_client)
|
||||
|
||||
return orderedSet(requested_clients)
|
||||
|
||||
@@ -3702,19 +3857,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return pr_id
|
||||
|
||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
||||
initial_pr = ignore_initial_response = None
|
||||
initial_pr = None
|
||||
if webpage:
|
||||
if 'web' in clients:
|
||||
experiments = traverse_obj(master_ytcfg, (
|
||||
'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...))
|
||||
if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
|
||||
self.report_warning(
|
||||
'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response')
|
||||
ignore_initial_response = True
|
||||
initial_pr = self._search_json(
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
||||
|
||||
prs = []
|
||||
deprioritized_prs = []
|
||||
|
||||
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
@@ -3736,14 +3886,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return
|
||||
|
||||
tried_iframe_fallback = False
|
||||
player_url = None
|
||||
player_url = visitor_data = data_sync_id = None
|
||||
skipped_clients = {}
|
||||
while clients:
|
||||
deprioritize_pr = False
|
||||
client, base_client, variant = _split_innertube_client(clients.pop())
|
||||
player_ytcfg = {}
|
||||
if client == 'web':
|
||||
player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg
|
||||
elif 'configs' not in self._configuration_arg('player_skip'):
|
||||
player_ytcfg = master_ytcfg if client == 'web' else {}
|
||||
if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
|
||||
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
|
||||
|
||||
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
|
||||
@@ -3756,43 +3905,77 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
player_url = self._download_player_url(video_id)
|
||||
tried_iframe_fallback = True
|
||||
|
||||
pr = initial_pr if client == 'web' and not ignore_initial_response else None
|
||||
for retry in self.RetryManager(fatal=False):
|
||||
try:
|
||||
pr = pr or self._extract_player_response(
|
||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg,
|
||||
player_url if require_js_player else None, initial_pr, smuggled_data)
|
||||
except ExtractorError as e:
|
||||
self.report_warning(e)
|
||||
break
|
||||
experiments = traverse_obj(pr, (
|
||||
'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK',
|
||||
'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...))
|
||||
if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
|
||||
pr = None
|
||||
retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True)
|
||||
if not pr:
|
||||
visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
|
||||
data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||
po_token = self.fetch_po_token(
|
||||
client=client, visitor_data=visitor_data,
|
||||
data_sync_id=data_sync_id if self.is_authenticated else None,
|
||||
player_url=player_url if require_js_player else None,
|
||||
)
|
||||
|
||||
require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN')
|
||||
if not po_token and require_po_token:
|
||||
self.report_warning(
|
||||
f'No PO Token provided for {client} client, '
|
||||
f'which is required for working {client} formats. '
|
||||
f'You can manually pass a PO Token for this client with '
|
||||
f'--extractor-args "youtube:po_token={client}+XXX"',
|
||||
only_once=True)
|
||||
deprioritize_pr = True
|
||||
|
||||
pr = initial_pr if client == 'web' else None
|
||||
try:
|
||||
pr = pr or self._extract_player_response(
|
||||
client, video_id,
|
||||
master_ytcfg=player_ytcfg or master_ytcfg,
|
||||
player_ytcfg=player_ytcfg,
|
||||
player_url=player_url,
|
||||
initial_pr=initial_pr,
|
||||
visitor_data=visitor_data,
|
||||
data_sync_id=data_sync_id,
|
||||
po_token=po_token)
|
||||
except ExtractorError as e:
|
||||
self.report_warning(e)
|
||||
continue
|
||||
|
||||
if pr_id := self._invalid_player_response(pr, video_id):
|
||||
skipped_clients[client] = pr_id
|
||||
elif pr:
|
||||
# Save client name for introspection later
|
||||
name = short_client_name(client)
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = name
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = client
|
||||
sd[STREAMING_DATA_PO_TOKEN] = po_token
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = name
|
||||
prs.append(pr)
|
||||
f[STREAMING_DATA_CLIENT_NAME] = client
|
||||
f[STREAMING_DATA_PO_TOKEN] = po_token
|
||||
if deprioritize_pr:
|
||||
deprioritized_prs.append(pr)
|
||||
else:
|
||||
prs.append(pr)
|
||||
|
||||
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
|
||||
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
|
||||
append_client(f'{base_client}_creator')
|
||||
elif self._is_agegated(pr):
|
||||
if variant == 'tv_embedded':
|
||||
append_client(f'{base_client}_embedded')
|
||||
elif not variant:
|
||||
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
|
||||
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
|
||||
if self._is_agegated(pr) and variant != 'tv_embedded':
|
||||
append_client(f'tv_embedded.{base_client}')
|
||||
|
||||
# Unauthenticated users will only get tv_embedded client formats if age-gated
|
||||
if self._is_agegated(pr) and not self.is_authenticated:
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted; some formats may be missing '
|
||||
f'without authentication. {self._login_hint()}', only_once=True)
|
||||
|
||||
# EU countries require age-verification for accounts to access age-restricted videos
|
||||
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
|
||||
# If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded
|
||||
embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr)
|
||||
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted and YouTube is requiring '
|
||||
'account age-verification; some formats may be missing', only_once=True)
|
||||
# web_creator and mediaconnect can work around the age-verification requirement
|
||||
# _producer, _testsuite, & _vr variants can also work around age-verification
|
||||
append_client('web_creator', 'mediaconnect')
|
||||
|
||||
prs.extend(deprioritized_prs)
|
||||
|
||||
if skipped_clients:
|
||||
self.report_warning(
|
||||
@@ -3927,14 +4110,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self.report_warning(
|
||||
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
||||
|
||||
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
||||
# Android client formats are broken due to integrity check enforcement
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
|
||||
is_broken = client_name and client_name.startswith(short_client_name('android'))
|
||||
client_name = fmt[STREAMING_DATA_CLIENT_NAME]
|
||||
po_token = fmt.get(STREAMING_DATA_PO_TOKEN)
|
||||
|
||||
if po_token:
|
||||
fmt_url = update_url_query(fmt_url, {'pot': po_token})
|
||||
|
||||
# Clients that require PO Token return videoplayback URLs that may return 403
|
||||
is_broken = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN'))
|
||||
if is_broken:
|
||||
self.report_warning(
|
||||
f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
|
||||
'They will be deprioritized', only_once=True)
|
||||
f'{video_id}: {client_name} client formats require a PO Token which was not provided. '
|
||||
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
|
||||
|
||||
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||||
fps = int_or_none(fmt.get('fps')) or 0
|
||||
@@ -3948,7 +4135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
is_damaged and 'DAMAGED', is_broken and 'BROKEN',
|
||||
(self.get_param('verbose') or all_formats) and client_name,
|
||||
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
|
||||
delim=', '),
|
||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||
'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
|
||||
@@ -4010,12 +4197,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
|
||||
skip_manifests.add('dash')
|
||||
|
||||
def process_manifest_format(f, proto, client_name, itag):
|
||||
def process_manifest_format(f, proto, client_name, itag, po_token):
|
||||
key = (proto, f.get('language'))
|
||||
if not all_formats and key in itags[itag]:
|
||||
return False
|
||||
itags[itag].add(key)
|
||||
|
||||
if f.get('source_preference') is None:
|
||||
f['source_preference'] = -1
|
||||
|
||||
# Clients that require PO Token return videoplayback URLs that may return 403
|
||||
# hls does not currently require PO Token
|
||||
if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls':
|
||||
self.report_warning(
|
||||
f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. '
|
||||
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
|
||||
f['format_note'] = join_nonempty(f.get('format_note'), 'BROKEN', delim=' ')
|
||||
f['source_preference'] -= 20
|
||||
|
||||
if itag and all_formats:
|
||||
f['format_id'] = f'{itag}-{proto}'
|
||||
elif any(p != proto for p, _ in itags[itag]):
|
||||
@@ -4027,9 +4226,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
||||
f['language_preference'] = PREFERRED_LANG_VALUE
|
||||
|
||||
if f.get('source_preference') is None:
|
||||
f['source_preference'] = -1
|
||||
|
||||
if itag in ('616', '235'):
|
||||
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
||||
f['source_preference'] += 100
|
||||
@@ -4038,7 +4234,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if f['quality'] == -1 and f.get('height'):
|
||||
f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
|
||||
if self.get_param('verbose') or all_formats:
|
||||
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
|
||||
f['format_note'] = join_nonempty(
|
||||
f.get('format_note'), short_client_name(client_name), delim=', ')
|
||||
if f.get('fps') and f['fps'] <= 1:
|
||||
del f['fps']
|
||||
|
||||
@@ -4049,24 +4246,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
subtitles = {}
|
||||
for sd in streaming_data:
|
||||
client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
|
||||
|
||||
client_name = sd[STREAMING_DATA_CLIENT_NAME]
|
||||
po_token = sd.get(STREAMING_DATA_PO_TOKEN)
|
||||
hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
|
||||
if hls_manifest_url:
|
||||
if po_token:
|
||||
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
|
||||
subtitles = self._merge_subtitles(subs, subtitles)
|
||||
for f in fmts:
|
||||
if process_manifest_format(f, 'hls', client_name, self._search_regex(
|
||||
r'/itag/(\d+)', f['url'], 'itag', default=None)):
|
||||
r'/itag/(\d+)', f['url'], 'itag', default=None), po_token):
|
||||
yield f
|
||||
|
||||
dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
|
||||
if dash_manifest_url:
|
||||
if po_token:
|
||||
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
|
||||
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
|
||||
for f in formats:
|
||||
if process_manifest_format(f, 'dash', client_name, f['format_id']):
|
||||
if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
|
||||
f['filesize'] = int_or_none(self._search_regex(
|
||||
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
|
||||
if needs_live_processing:
|
||||
@@ -4888,7 +5089,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
def _rich_entries(self, rich_grid_renderer):
|
||||
renderer = traverse_obj(
|
||||
rich_grid_renderer,
|
||||
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
|
||||
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
|
||||
video_id = renderer.get('videoId')
|
||||
if video_id:
|
||||
yield self._extract_video(renderer)
|
||||
@@ -4900,6 +5101,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
||||
video_title=self._get_text(renderer, 'title'))
|
||||
return
|
||||
# shortsLockupViewModel extraction
|
||||
entity_id = renderer.get('entityId')
|
||||
if entity_id:
|
||||
video_id = traverse_obj(renderer, ('onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId', {str}))
|
||||
if not video_id:
|
||||
return
|
||||
yield self.url_result(
|
||||
f'https://www.youtube.com/shorts/{video_id}',
|
||||
ie=YoutubeIE, video_id=video_id,
|
||||
**traverse_obj(renderer, ('overlayMetadata', {
|
||||
'title': ('primaryText', 'content', {str}),
|
||||
'view_count': ('secondaryText', 'content', {parse_count}),
|
||||
})),
|
||||
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
|
||||
return
|
||||
|
||||
def _video_entry(self, video_renderer):
|
||||
video_id = video_renderer.get('videoId')
|
||||
@@ -7439,6 +7655,8 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||
'id': clip_id,
|
||||
'section_start': int(clip_data['startTimeMs']) / 1000,
|
||||
'section_end': int(clip_data['endTimeMs']) / 1000,
|
||||
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
|
||||
'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang'),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -709,9 +709,9 @@ class JSInterpreter:
|
||||
obj.reverse()
|
||||
return obj
|
||||
elif member == 'slice':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
||||
return obj[argvals[0]:]
|
||||
assertion(isinstance(obj, (list, str)), 'must be applied on a list or string')
|
||||
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
|
||||
return obj[slice(*argvals, None)]
|
||||
elif member == 'splice':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
|
||||
@@ -31,9 +31,9 @@ if curl_cffi is None:
|
||||
|
||||
curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3]))
|
||||
|
||||
if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)):
|
||||
if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 7, 2)):
|
||||
curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)'
|
||||
raise ImportError('Only curl_cffi versions 0.5.10, 0.7.X are supported')
|
||||
raise ImportError('Only curl_cffi versions 0.5.10, 0.7.0 and 0.7.1 are supported')
|
||||
|
||||
import curl_cffi.requests
|
||||
from curl_cffi.const import CurlECode, CurlOpt
|
||||
|
||||
@@ -10,7 +10,7 @@ import typing
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
from .exceptions import RequestError, UnsupportedRequest
|
||||
from .exceptions import RequestError
|
||||
from ..dependencies import certifi
|
||||
from ..socks import ProxyType, sockssocket
|
||||
from ..utils import format_field, traverse_obj
|
||||
@@ -206,7 +206,7 @@ def wrap_request_errors(func):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
except UnsupportedRequest as e:
|
||||
except RequestError as e:
|
||||
if e.handler is None:
|
||||
e.handler = self
|
||||
raise
|
||||
|
||||
@@ -33,8 +33,8 @@ if not websockets:
|
||||
import websockets.version
|
||||
|
||||
websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
|
||||
if websockets_version < (12, 0):
|
||||
raise ImportError('Only websockets>=12.0 is supported')
|
||||
if websockets_version < (13, 0):
|
||||
raise ImportError('Only websockets>=13.0 is supported')
|
||||
|
||||
import websockets.sync.client
|
||||
from websockets.uri import parse_uri
|
||||
@@ -47,10 +47,7 @@ from websockets.uri import parse_uri
|
||||
# 2: "AttributeError: 'ClientConnection' object has no attribute 'recv_events_exc'. Did you mean: 'recv_events'?"
|
||||
import websockets.sync.connection # isort: split
|
||||
with contextlib.suppress(Exception):
|
||||
# > 12.0
|
||||
websockets.sync.connection.Connection.recv_exc = None
|
||||
# 12.0
|
||||
websockets.sync.connection.Connection.recv_events_exc = None
|
||||
|
||||
|
||||
class WebsocketsResponseAdapter(WebSocketResponse):
|
||||
@@ -162,7 +159,7 @@ class WebsocketsRH(WebSocketRequestHandler):
|
||||
additional_headers=headers,
|
||||
open_timeout=timeout,
|
||||
user_agent_header=None,
|
||||
ssl_context=ssl_ctx if wsuri.secure else None,
|
||||
ssl=ssl_ctx if wsuri.secure else None,
|
||||
close_timeout=0, # not ideal, but prevents yt-dlp hanging
|
||||
)
|
||||
return WebsocketsResponseAdapter(conn, url=request.url)
|
||||
|
||||
@@ -647,16 +647,16 @@ def create_parser():
|
||||
'You can also simply specify a field to match if the field is present, '
|
||||
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
|
||||
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
|
||||
'the filter matches if at least one of the conditions is met. E.g. --match-filter '
|
||||
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
|
||||
'the filter matches if at least one of the conditions is met. E.g. --match-filters '
|
||||
'!is_live --match-filters "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
|
||||
'matches only videos that are not live OR those that have a like count more than 100 '
|
||||
'(or the like field is not available) and also has a description '
|
||||
'that contains the phrase "cats & dogs" (caseless). '
|
||||
'Use "--match-filter -" to interactively ask whether to download each video'))
|
||||
'Use "--match-filters -" to interactively ask whether to download each video'))
|
||||
selection.add_option(
|
||||
'--no-match-filters',
|
||||
dest='match_filter', action='store_const', const=None,
|
||||
help='Do not use any --match-filter (default)')
|
||||
help='Do not use any --match-filters (default)')
|
||||
selection.add_option(
|
||||
'--break-match-filters',
|
||||
metavar='FILTER', dest='breaking_match_filter', action='append',
|
||||
@@ -704,7 +704,7 @@ def create_parser():
|
||||
selection.add_option(
|
||||
'--break-per-input',
|
||||
action='store_true', dest='break_per_url', default=False,
|
||||
help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL')
|
||||
help='Alters --max-downloads, --break-on-existing, --break-match-filters, and autonumber to reset per input URL')
|
||||
selection.add_option(
|
||||
'--no-break-per-input',
|
||||
action='store_false', dest='break_per_url',
|
||||
@@ -1725,15 +1725,17 @@ def create_parser():
|
||||
'--convert-subs', '--convert-sub', '--convert-subtitles',
|
||||
metavar='FORMAT', dest='convertsubtitles', default=None,
|
||||
help=(
|
||||
'Convert the subtitles to another format (currently supported: {}) '
|
||||
'(Alias: --convert-subtitles)'.format(', '.join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)))))
|
||||
'Convert the subtitles to another format '
|
||||
f'(currently supported: {", ".join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))}). '
|
||||
'Use "--convert-subs none" to disable conversion (default) (Alias: --convert-subtitles)'))
|
||||
postproc.add_option(
|
||||
'--convert-thumbnails',
|
||||
metavar='FORMAT', dest='convertthumbnails', default=None,
|
||||
help=(
|
||||
'Convert the thumbnails to another format '
|
||||
f'(currently supported: {", ".join(sorted(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS))}). '
|
||||
'You can specify multiple rules using similar syntax as --remux-video'))
|
||||
'You can specify multiple rules using similar syntax as "--remux-video". '
|
||||
'Use "--convert-thumbnails none" to disable conversion (default)'))
|
||||
postproc.add_option(
|
||||
'--split-chapters', '--split-tracks',
|
||||
dest='split_chapters', action='store_true', default=False,
|
||||
|
||||
@@ -33,7 +33,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
|
||||
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
|
||||
FFmpegPostProcessor.__init__(self, downloader)
|
||||
self._categories = tuple(categories or self.CATEGORIES.keys())
|
||||
self._API_URL = api if re.match('^https?://', api) else 'https://' + api
|
||||
self._API_URL = api if re.match('https?://', api) else 'https://' + api
|
||||
|
||||
def run(self, info):
|
||||
extractor = info['extractor_key']
|
||||
|
||||
@@ -135,20 +135,42 @@ def _get_binary_name():
|
||||
|
||||
|
||||
def _get_system_deprecation():
|
||||
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 8), (3, 8)
|
||||
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 8), (3, 9)
|
||||
|
||||
if sys.version_info > MIN_RECOMMENDED:
|
||||
return None
|
||||
|
||||
major, minor = sys.version_info[:2]
|
||||
if sys.version_info < MIN_SUPPORTED:
|
||||
msg = f'Python version {major}.{minor} is no longer supported'
|
||||
else:
|
||||
msg = (f'Support for Python version {major}.{minor} has been deprecated. '
|
||||
'\nYou may stop receiving updates on this version at any time')
|
||||
PYTHON_MSG = f'Please update to Python {".".join(map(str, MIN_RECOMMENDED))} or above'
|
||||
|
||||
major, minor = MIN_RECOMMENDED
|
||||
return f'{msg}! Please update to Python {major}.{minor} or above'
|
||||
if sys.version_info < MIN_SUPPORTED:
|
||||
return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}'
|
||||
|
||||
EXE_MSG_TMPL = ('Support for {} has been deprecated. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}')
|
||||
STOP_MSG = 'You may stop receiving updates on this version at any time!'
|
||||
variant = detect_variant()
|
||||
|
||||
# Temporary until Windows builds use 3.9, which will drop support for Win7 and 2008ServerR2
|
||||
if variant in ('win_exe', 'win_x86_exe', 'py2exe'):
|
||||
platform_name = platform.platform()
|
||||
if any(platform_name.startswith(f'Windows-{name}') for name in ('7', '2008ServerR2')):
|
||||
return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG)
|
||||
elif variant == 'py2exe':
|
||||
return EXE_MSG_TMPL.format(
|
||||
'py2exe builds (yt-dlp_min.exe)', 'issues/10087',
|
||||
'In a future update you will be migrated to the PyInstaller-bundled executable. '
|
||||
'This will be done automatically; no action is required on your part')
|
||||
return None
|
||||
|
||||
# Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9
|
||||
elif variant in ('linux_aarch64_exe', 'linux_armv7l_exe'):
|
||||
libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2])
|
||||
if libc_ver < (2, 31):
|
||||
return EXE_MSG_TMPL.format('system glibc version < 2.31', 'pull/8638', STOP_MSG)
|
||||
return None
|
||||
|
||||
return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}'
|
||||
|
||||
|
||||
def _sha256_file(path):
|
||||
|
||||
@@ -1217,7 +1217,7 @@ def unified_timestamp(date_str, day_first=True):
|
||||
return None
|
||||
|
||||
date_str = re.sub(r'\s+', ' ', re.sub(
|
||||
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
|
||||
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?|sun)(day)?', '', date_str))
|
||||
|
||||
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
||||
timezone, date_str = extract_timezone(date_str)
|
||||
@@ -1954,7 +1954,7 @@ def urljoin(base, path):
|
||||
path = path.decode()
|
||||
if not isinstance(path, str) or not path:
|
||||
return None
|
||||
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
|
||||
if re.match(r'(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
|
||||
return path
|
||||
if isinstance(base, bytes):
|
||||
base = base.decode()
|
||||
@@ -2007,7 +2007,7 @@ def url_or_none(url):
|
||||
if not url or not isinstance(url, str):
|
||||
return None
|
||||
url = url.strip()
|
||||
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||
|
||||
|
||||
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
||||
@@ -2919,6 +2919,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
|
||||
'audio/webm': 'webm',
|
||||
'audio/x-matroska': 'mka',
|
||||
'audio/x-mpegurl': 'm3u',
|
||||
'aacp': 'aac',
|
||||
'midi': 'mid',
|
||||
'ogg': 'ogg',
|
||||
'wav': 'wav',
|
||||
@@ -3112,7 +3113,7 @@ def is_html(first_bytes):
|
||||
while first_bytes.startswith(bom):
|
||||
encoding, first_bytes = enc, first_bytes[len(bom):]
|
||||
|
||||
return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
|
||||
return re.match(r'\s*<', first_bytes.decode(encoding, 'replace'))
|
||||
|
||||
|
||||
def determine_protocol(info_dict):
|
||||
@@ -5280,7 +5281,7 @@ class FormatSorter:
|
||||
|
||||
settings = {
|
||||
'vcodec': {'type': 'ordered', 'regex': True,
|
||||
'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
|
||||
'order': ['av0?1', 'vp0?9.0?2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
|
||||
'acodec': {'type': 'ordered', 'regex': True,
|
||||
'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
|
||||
'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2024.07.16'
|
||||
__version__ = '2024.09.27'
|
||||
|
||||
RELEASE_GIT_HEAD = '89a161e8c62569a662deda1c948664152efcb6b4'
|
||||
RELEASE_GIT_HEAD = 'c6387abc1af9842bb0541288a5610abba9b1ab51'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
|
||||
|
||||
ORIGIN = 'yt-dlp/yt-dlp'
|
||||
|
||||
_pkg_version = '2024.07.16'
|
||||
_pkg_version = '2024.09.27'
|
||||
|
||||
Reference in New Issue
Block a user