1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-01-30 18:51:51 +00:00

Merge branch 'master' into GoogleDriveFolderFix

This may fix the failing ci
This commit is contained in:
grqx
2024-10-02 17:43:36 +13:00
117 changed files with 3613 additions and 1161 deletions

View File

@@ -235,6 +235,11 @@ def validate_options(opts):
validate_regex('format sorting', f, FormatSorter.regex)
# Postprocessor formats
if opts.convertsubtitles == 'none':
opts.convertsubtitles = None
if opts.convertthumbnails == 'none':
opts.convertthumbnails = None
validate_regex('merge output format', opts.merge_output_format,
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)

View File

@@ -1053,8 +1053,9 @@ def _decrypt_windows_dpapi(ciphertext, logger):
ctypes.byref(blob_out), # pDataOut
)
if not ret:
logger.warning('failed to decrypt with DPAPI', only_once=True)
return None
message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
logger.error(message)
raise DownloadError(message) # force exit
result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
ctypes.windll.kernel32.LocalFree(blob_out.pbData)

View File

@@ -508,7 +508,7 @@ class FFmpegFD(ExternalFD):
env = None
proxy = self.params.get('proxy')
if proxy:
if not re.match(r'^[\da-zA-Z]+://', proxy):
if not re.match(r'[\da-zA-Z]+://', proxy):
proxy = f'http://{proxy}'
if proxy.startswith('socks'):
@@ -559,7 +559,7 @@ class FFmpegFD(ExternalFD):
selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats):
is_http = re.match(r'^https?://', fmt['url'])
is_http = re.match(r'https?://', fmt['url'])
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
if cookies:
args.extend(['-cookies', ''.join(

View File

@@ -217,6 +217,7 @@ from .bbc import (
BBCCoUkIPlayerGroupIE,
BBCCoUkPlaylistIE,
)
from .beacon import BeaconTvIE
from .beatbump import (
BeatBumpPlaylistIE,
BeatBumpVideoIE,
@@ -729,6 +730,7 @@ from .genius import (
GeniusIE,
GeniusLyricsIE,
)
from .germanupa import GermanupaIE
from .getcourseru import (
GetCourseRuIE,
GetCourseRuPlayerIE,
@@ -822,7 +824,10 @@ from .hungama import (
HungamaIE,
HungamaSongIE,
)
from .huya import HuyaLiveIE
from .huya import (
HuyaLiveIE,
HuyaVideoIE,
)
from .hypem import HypemIE
from .hypergryph import MonsterSirenHypergryphMusicIE
from .hytale import HytaleIE
@@ -939,11 +944,13 @@ from .khanacademy import (
KhanAcademyUnitIE,
)
from .kick import (
KickClipIE,
KickIE,
KickVODIE,
)
from .kicker import KickerIE
from .kickstarter import KickStarterIE
from .kika import KikaIE
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE
@@ -986,6 +993,7 @@ from .lcp import (
LcpIE,
LcpPlayIE,
)
from .learningonscreen import LearningOnScreenIE
from .lecture2go import Lecture2GoIE
from .lecturio import (
LecturioCourseIE,
@@ -1034,10 +1042,7 @@ from .livestream import (
LivestreamShortenerIE,
)
from .livestreamfails import LivestreamfailsIE
from .lnkgo import (
LnkGoIE,
LnkIE,
)
from .lnk import LnkIE
from .loom import (
LoomFolderIE,
LoomIE,
@@ -1162,6 +1167,7 @@ from .mlb import (
)
from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE
from .mojevideo import MojevideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .motherless import (
@@ -1808,6 +1814,7 @@ from .screen9 import Screen9IE
from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE
from .screencastomatic import ScreencastOMaticIE
from .screenrec import ScreenRecIE
from .scrippsnetworks import (
ScrippsNetworksIE,
ScrippsNetworksWatchIE,
@@ -1818,6 +1825,7 @@ from .scte import (
SCTECourseIE,
)
from .sejmpl import SejmIE
from .sen import SenIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import (
SenateGovIE,
@@ -1873,6 +1881,7 @@ from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
from .smotrim import SmotrimIE
from .snapchat import SnapchatSpotlightIE
from .snotr import SnotrIE
from .sohu import (
SohuIE,
@@ -2169,10 +2178,7 @@ from .tv5unis import (
TV5UnisVideoIE,
)
from .tv24ua import TV24UAVideoIE
from .tva import (
TVAIE,
QubIE,
)
from .tva import TVAIE
from .tvanouvelles import (
TVANouvellesArticleIE,
TVANouvellesIE,
@@ -2312,6 +2318,7 @@ from .videomore import (
VideomoreVideoIE,
)
from .videopress import VideoPressIE
from .vidflex import VidflexIE
from .vidio import (
VidioIE,
VidioLiveIE,

View File

@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
},
'playlist_count': 15,
'skip': 'This program is not currently available in ABC iview',
}, {
'url': 'https://iview.abc.net.au/show/inbestigators',
'info_dict': {
'id': '175343-1',
'title': 'Series 1',
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
'series': 'The Inbestigators',
'season': 'Series 1',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
},
'playlist_count': 17,
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
webpage_data = self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
webpage, 'initial state')
video_data = self._parse_json(
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
video_data = video_data['route']['pageData']['_embedded']
video_data = self._search_json(
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
transform_source=lambda x: x.encode().decode('unicode_escape'),
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):

View File

@@ -9,12 +9,12 @@ import re
import struct
import time
import urllib.parse
import urllib.request
import urllib.response
import uuid
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..networking import RequestHandler, Response
from ..networking.exceptions import TransportError
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -26,37 +26,36 @@ from ..utils import (
traverse_obj,
update_url_query,
)
from ..utils.networking import clean_proxies
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
"""Add a handler for opening URLs, like _download_webpage"""
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
rh = ydl._request_director.handlers['Urllib']
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
return
headers = ydl.params['http_headers'].copy()
proxies = ydl.proxies.copy()
clean_proxies(proxies, headers)
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
assert isinstance(opener, urllib.request.OpenerDirector)
opener.add_handler(handler)
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
class AbemaLicenseRH(RequestHandler):
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
_SUPPORTED_PROXY_SCHEMES = None
_SUPPORTED_FEATURES = None
RH_NAME = 'abematv_license'
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
class AbemaLicenseHandler(urllib.request.BaseHandler):
handler_order = 499
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
def __init__(self, ie: 'AbemaTVIE'):
# the protocol that this should really handle is 'abematv-license://'
# abematv_license_open is just a placeholder for development purposes
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
super().__init__(**kwargs)
self.ie = ie
def _send(self, request):
url = request.url
ticket = urllib.parse.urlparse(url).netloc
try:
response_data = self._get_videokey_from_ticket(ticket)
except ExtractorError as e:
raise TransportError(cause=e.cause) from e
except (IndexError, KeyError, TypeError) as e:
raise TransportError(cause=repr(e)) from e
return Response(
io.BytesIO(response_data), url,
headers={'Content-Length': str(len(response_data))})
def _get_videokey_from_ticket(self, ticket):
to_show = self.ie.get_param('verbose', False)
media_token = self.ie._get_media_token(to_show=to_show)
@@ -72,25 +71,17 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
'Content-Type': 'application/json',
})
res = decode_base_n(license_response['k'], table=self.STRTABLE)
res = decode_base_n(license_response['k'], table=self._STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
h = hmac.new(
binascii.unhexlify(self.HKEY),
binascii.unhexlify(self._HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest())
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
def abematv_license_open(self, url):
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': str(len(response_data)),
}, url=url, code=200)
class AbemaTVBaseIE(InfoExtractor):
_NETRC_MACHINE = 'abematv'
@@ -139,7 +130,7 @@ class AbemaTVBaseIE(InfoExtractor):
if self._USERTOKEN:
return self._USERTOKEN
add_opener(self._downloader, AbemaLicenseHandler(self))
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
username, _ = self._get_login_info()
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
@@ -386,8 +377,7 @@ class AbemaTVIE(AbemaTVBaseIE):
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
note='Checking playability',
headers=headers)
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
if 3 not in ondemand_types:
if not traverse_obj(api_response, ('label', 'free', {bool})):
# cannot acquire decryption key for these streams
self.report_warning('This is a premium-only stream')
availability = 'premium_only'

View File

@@ -4,7 +4,7 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
_TEST = {
'url': 'http://academicearth.org/playlists/laws-of-nature/',

View File

@@ -49,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
class ADNIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir',
'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
'id': '9841',
@@ -71,10 +71,7 @@ class ADNIE(ADNBaseIE):
},
'skip': 'Only available in French and German speaking Europe',
}, {
'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}, {
'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
'info_dict': {
'id': '23550',
@@ -167,7 +164,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
'username': username,
})) or {}).get('accessToken')
if access_token:
self._HEADERS = {'authorization': 'Bearer ' + access_token}
self._HEADERS['Authorization'] = f'Bearer {access_token}'
except ExtractorError as e:
message = None
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
@@ -178,6 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
def _real_extract(self, url):
lang, video_id = self._match_valid_url(url).group('lang', 'id')
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
player = self._download_json(
video_base_url + 'configuration', video_id,
@@ -218,7 +216,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
links_data = self._download_json(
links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization,
'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'freeWithAds': 'true',
@@ -257,6 +254,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
load_balancer_data = self._download_json(
load_balancer_url, video_id,
f'Downloading {format_id} {quality} JSON metadata',
headers=self._HEADERS,
fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
@@ -277,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
video = (self._download_json(
self._API_BASE_URL + f'video/{video_id}', video_id,
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
show = video.get('show') or {}
return {
@@ -299,9 +297,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
class ADNSeasonIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])'
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
_TESTS = [{
'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new',
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
'playlist_count': 12,
'info_dict': {
'id': '911',
@@ -312,16 +310,14 @@ class ADNSeasonIE(ADNBaseIE):
def _real_extract(self, url):
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
show = self._download_json(
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
'Downloading show JSON metadata', headers=self._HEADERS)['show']
show_id = str(show['id'])
episodes = self._download_json(
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers={
'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'Downloading episode list', headers=self._HEADERS, query={
'order': 'asc',
'limit': '-1',
})

View File

@@ -1,27 +1,42 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
clean_podcast_url,
get_element_by_class,
int_or_none,
parse_iso8601,
try_get,
)
from ..utils.traversal import traverse_obj
class ApplePodcastsIE(InfoExtractor):
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
_TESTS = [{
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
'info_dict': {
'id': '1000665010654',
'ext': 'mp3',
'title': 'Ferreck Dawn - To The Break of Dawn 117',
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
'upload_date': '20240812',
'timestamp': 1723449600,
'duration': 3596,
'series': 'Ferreck Dawn - To The Break of Dawn',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
},
}, {
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'md5': '41dc31cd650143e530d9423b6b5a344f',
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
'info_dict': {
'id': '1000482637777',
'ext': 'mp3',
'title': '207 - Whitney Webb Returns',
'episode': '207 - Whitney Webb Returns',
'episode_number': 207,
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
'upload_date': '20200705',
'timestamp': 1593932400,
'duration': 6454,
'duration': 5369,
'series': 'The Tim Dillon Show',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
},
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
def _real_extract(self, url):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
episode_data = {}
ember_data = {}
# new page type 2021-11
amp_data = self._parse_json(self._search_regex(
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
amp_data = try_get(amp_data,
lambda a: self._parse_json(
next(a[x] for x in iter(a) if episode_id in x),
episode_id),
dict) or {}
amp_data = amp_data.get('d') or []
episode_data = try_get(
amp_data,
lambda a: next(x for x in a
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
dict)
if not episode_data:
# try pre 2021-11 page type: TODO: consider deleting if no longer used
ember_data = self._parse_json(self._search_regex(
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
webpage, 'ember data'), episode_id) or {}
ember_data = ember_data.get(episode_id) or ember_data
episode_data = try_get(ember_data, lambda x: x['data'], dict)
episode = episode_data['attributes']
description = episode.get('description') or {}
series = None
for inc in (amp_data or ember_data.get('included') or []):
if inc.get('type') == 'media/podcast':
series = try_get(inc, lambda x: x['attributes']['name'])
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
server_data = self._search_json(
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
model_data = traverse_obj(server_data, (
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
'model', {dict}, any))
return {
'id': episode_id,
'title': episode.get('name'),
'url': clean_podcast_url(episode['assetUrl']),
'description': description.get('standard') or description.get('short'),
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
'series': series,
**self._json_ld(
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
**traverse_obj(model_data, {
'title': ('title', {str}),
'url': ('streamUrl', {clean_podcast_url}),
'timestamp': ('releaseDate', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
}),
'thumbnail': self._og_search_thumbnail(webpage),
'vcodec': 'none',
}

View File

@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(InfoExtractor):
IE_NAME = 'ARDMediathek'
_VALID_URL = r'''(?x)https://
_VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/]+/)?
(?:player|live|video)/
@@ -470,7 +470,7 @@ class ARDBetaMediathekIE(InfoExtractor):
class ARDMediathekCollectionIE(InfoExtractor):
_VALID_URL = r'''(?x)https://
_VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/?#]+/)?
(?P<playlist>sendung|serie|sammlung)/

View File

@@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
self._HEADERS['Authorization'] = f'Bearer {token}'
def _real_extract(self, url):
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
webpage = self._download_webpage(url, video_id)
event_data = traverse_obj(
self._search_nextjs_data(webpage, video_id, default={}),
('props', 'pageProps', 'eventCMSData', {

View File

@@ -1,3 +1,5 @@
import functools
import json
import random
import re
import time
@@ -6,7 +8,9 @@ from .common import InfoExtractor
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
extract_attributes,
float_or_none,
get_element_html_by_id,
int_or_none,
parse_filesize,
str_or_none,
@@ -17,6 +21,7 @@ from ..utils import (
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class BandcampIE(InfoExtractor):
@@ -459,7 +464,7 @@ class BandcampUserIE(InfoExtractor):
},
}, {
'url': 'https://coldworldofficial.bandcamp.com/music',
'playlist_mincount': 10,
'playlist_mincount': 7,
'info_dict': {
'id': 'coldworldofficial',
'title': 'Discography of coldworldofficial',
@@ -473,12 +478,19 @@ class BandcampUserIE(InfoExtractor):
},
}]
def _yield_items(self, webpage):
yield from (
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
yield from traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
'data-client-items', {json.loads}, ..., 'page_url', {str}))
def _real_extract(self, url):
uploader = self._match_id(url)
webpage = self._download_webpage(url, uploader)
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
return self.playlist_from_matches(
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url))

View File

@@ -0,0 +1,68 @@
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_iso8601,
traverse_obj,
)
class BeaconTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beacon.tv/content/welcome-to-beacon',
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
'info_dict': {
'id': 'welcome-to-beacon',
'ext': 'mp4',
'upload_date': '20240509',
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
'title': 'Your home for Critical Role!',
'timestamp': 1715227200,
'duration': 105.494,
},
}, {
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
'md5': 'd879b091485dbed2245094c8152afd89',
'info_dict': {
'id': 're-slayers-take-trailer',
'ext': 'mp4',
'title': 'The Re-Slayers Take | Official Trailer',
'timestamp': 1715189040,
'upload_date': '20240508',
'duration': 53.249,
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', '__APOLLO_STATE__',
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
if not content_data:
raise ExtractorError('Failed to extract content data')
jwplayer_data = traverse_obj(content_data, (
(('contentVideo', 'video', 'videoData'),
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
if not jwplayer_data:
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
raise ExtractorError('Content is not a video/podcast', expected=True)
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
self.raise_login_required('This video/podcast is for members only')
raise ExtractorError('Failed to extract content')
return {
**self._parse_jwplayer_data(jwplayer_data, video_id),
**traverse_obj(content_data, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
}),
}

View File

@@ -46,6 +46,7 @@ from ..utils import (
class BilibiliBaseIE(InfoExtractor):
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
_wbi_key_cache = {}
@@ -192,7 +193,7 @@ class BilibiliBaseIE(InfoExtractor):
video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}')
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
self.report_warning(
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
@@ -207,7 +208,7 @@ class BilibiliBaseIE(InfoExtractor):
def _get_chapters(self, aid, cid):
chapters = aid and cid and self._download_json(
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
note='Extracting chapters', fatal=False)
note='Extracting chapters', fatal=False, headers=self._HEADERS)
return traverse_obj(chapters, ('data', 'view_points', ..., {
'title': 'content',
'start_time': 'from',
@@ -298,7 +299,7 @@ class BilibiliBaseIE(InfoExtractor):
class BiliBiliIE(BilibiliBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bilibili.com/video/BV13x41117TL',
@@ -622,6 +623,10 @@ class BiliBiliIE(BilibiliBaseIE):
'ext': 'mp4',
},
'skip': 'geo-restricted',
}, {
'note': 'has - in the last path segment of the url',
'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -1017,8 +1022,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
class BilibiliCheeseBaseIE(BilibiliBaseIE):
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
def _extract_episode(self, season_info, ep_id):
episode_info = traverse_obj(season_info, (
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
@@ -1848,7 +1851,7 @@ class BiliBiliPlayerIE(InfoExtractor):
class BiliIntlBaseIE(InfoExtractor):
_API_URL = 'https://api.bilibili.tv/intl/gateway'
_NETRC_MACHINE = 'biliintl'
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
def _call_api(self, endpoint, *args, **kwargs):
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)

View File

@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
class CallinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
_TESTS = [{
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
'info_dict': {

View File

@@ -1,4 +1,5 @@
import base64
import functools
import json
import re
import time
@@ -6,17 +7,24 @@ import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
join_nonempty,
js_to_json,
mimetype2ext,
orderedSet,
parse_iso8601,
replace_extension,
smuggle_url,
strip_or_none,
traverse_obj,
try_get,
update_url,
url_basename,
url_or_none,
)
@@ -149,6 +157,7 @@ class CBCIE(InfoExtractor):
class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_GEO_COUNTRIES = ['CA']
_TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c',
@@ -172,21 +181,20 @@ class CBCPlayerIE(InfoExtractor):
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
'timestamp': 1425704400,
'upload_date': '20150307',
'uploader': 'CBCC-NEW',
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
'chapters': [],
'duration': 494.811,
'categories': ['AudioMobile/All in a Weekend Montreal'],
'tags': 'count:8',
'categories': ['All in a Weekend Montreal'],
'tags': 'count:11',
'location': 'Quebec',
'series': 'All in a Weekend Montreal',
'season': 'Season 2015',
'season_number': 2015,
'media_type': 'Excerpt',
'genres': ['Other'],
},
}, {
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': {
'id': '2164402062',
'ext': 'mp4',
@@ -194,107 +202,168 @@ class CBCPlayerIE(InfoExtractor):
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
'timestamp': 1320410746,
'upload_date': '20111104',
'uploader': 'CBCC-NEW',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
'chapters': [],
'duration': 186.867,
'series': 'CBC News: Windsor at 6:00',
'categories': ['News/Canada/Windsor'],
'categories': ['Windsor'],
'location': 'Windsor',
'tags': ['cancer'],
'creators': ['Allison Johnson'],
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
'media_type': 'Excerpt',
'genres': ['News'],
},
'params': {'skip_download': 'm3u8'},
}, {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
'url': 'https://www.cbc.ca/player/play/1.2985700',
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
'info_dict': {
'id': '2657631896',
'id': '1.2985700',
'ext': 'mp3',
'title': 'CBC Montreal is organizing its first ever community hackathon!',
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
'timestamp': 1425704400,
'upload_date': '20150307',
'uploader': 'CBCC-NEW',
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
'chapters': [],
'duration': 494.811,
'categories': ['AudioMobile/All in a Weekend Montreal'],
'tags': 'count:8',
'categories': ['All in a Weekend Montreal'],
'tags': 'count:11',
'location': 'Quebec',
'series': 'All in a Weekend Montreal',
'season': 'Season 2015',
'season_number': 2015,
'media_type': 'Excerpt',
'genres': ['Other'],
},
}, {
'url': 'https://www.cbc.ca/player/play/1.1711287',
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': {
'id': '2164402062',
'id': '1.1711287',
'ext': 'mp4',
'title': 'Cancer survivor four times over',
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
'timestamp': 1320410746,
'upload_date': '20111104',
'uploader': 'CBCC-NEW',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
'chapters': [],
'duration': 186.867,
'series': 'CBC News: Windsor at 6:00',
'categories': ['News/Canada/Windsor'],
'categories': ['Windsor'],
'location': 'Windsor',
'tags': ['cancer'],
'creators': ['Allison Johnson'],
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
'media_type': 'Excerpt',
'genres': ['News'],
},
'params': {'skip_download': 'm3u8'},
}, {
# Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
'url': 'https://www.cbc.ca/player/play/1.7159484',
'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
'url': 'https://www.cbc.ca/player/play/video/9.6424403',
'md5': '8025909eaffcf0adf59922904def9a5e',
'info_dict': {
'id': '2324213316001',
'id': '9.6424403',
'ext': 'mp4',
'title': 'The National | School boards sue social media giants',
'description': 'md5:4b4db69322fa32186c3ce426da07402c',
'timestamp': 1711681200,
'duration': 2743.400,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
'uploader': 'CBCC-NEW',
'title': 'The National | N.W.T. wildfire emergency',
'description': 'md5:ada33d36d1df69347ed575905bfd496c',
'timestamp': 1718589600,
'duration': 2692.833,
'subtitles': {
'en-US': [{
'name': 'English Captions',
'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt',
}],
},
'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg',
'chapters': 'count:5',
'upload_date': '20240329',
'categories': 'count:4',
'upload_date': '20240617',
'categories': ['News', 'The National', 'The National Latest Broadcasts'],
'series': 'The National - Full Show',
'tags': 'count:1',
'creators': ['News'],
'tags': ['The National'],
'location': 'Canada',
'media_type': 'Full Program',
'genres': ['News'],
},
}, {
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
'md5': '188b96cf6bdcb2540e178a6caa957128',
'info_dict': {
'id': '2334524995812',
'id': '1.7194274',
'ext': 'mp4',
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
'timestamp': 1714788791,
'duration': 77.678,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
'uploader': 'CBCC-NEW',
'chapters': 'count:0',
'upload_date': '20240504',
'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
'chapters': [],
'categories': 'count:3',
'series': 'The National',
'tags': 'count:15',
'creators': ['encoder'],
'tags': 'count:17',
'location': 'Canada',
'media_type': 'Excerpt',
'upload_date': '20240504',
'genres': ['News'],
},
}, {
'url': 'https://www.cbc.ca/player/play/video/9.6427282',
'info_dict': {
'id': '9.6427282',
'ext': 'mp4',
'title': 'Men\'s Soccer - Argentina vs Morocco',
'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
'series': 'CBC Sports',
'media_type': 'Event Coverage',
'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
'timestamp': 1721825400.0,
'upload_date': '20240724',
'duration': 10568.0,
'chapters': [],
'genres': [],
'tags': ['2024 Paris Olympic Games'],
'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
'location': 'Canada',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.cbc.ca/player/play/video/9.6459530',
'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
'info_dict': {
'id': '9.6459530',
'ext': 'mp4',
'title': 'Parts of Jasper incinerated as wildfire rages',
'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
'series': 'The National',
'media_type': 'Excerpt',
'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
'timestamp': 1721964091.012,
'upload_date': '20240726',
'duration': 952.285,
'chapters': [],
'genres': [],
'tags': 'count:23',
'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
},
}, {
'url': 'https://www.cbc.ca/player/play/video/9.6420651',
'md5': '71a850c2c6ee5e912de169f5311bb533',
'info_dict': {
'id': '9.6420651',
'ext': 'mp4',
'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
'series': 'CBC News Edmonton',
'media_type': 'Excerpt',
'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
'timestamp': 1718220065.768,
'upload_date': '20240612',
'duration': 286.086,
'chapters': [],
'genres': ['News'],
'categories': ['News', 'Edmonton'],
'tags': 'count:7',
'location': 'Edmonton',
},
}, {
'url': 'cbcplayer:1.7159484',
@@ -307,23 +376,113 @@ class CBCPlayerIE(InfoExtractor):
'only_matching': True,
}]
def _parse_param(self, asset_data, name):
return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any))
def _real_extract(self, url):
video_id = self._match_id(url)
if '.' in video_id:
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
video_id = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage,
'initial state', video_id)['video']['currentClip']['mediaId']
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
data = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip']
assets = traverse_obj(
data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type']))
if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))):
# XXX: Deprecated; CBC is migrating off of ThePlatform
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
'force_smil_url': True,
}),
'id': media_id,
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
}
is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live'
formats, subtitles = [], {}
for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))):
subtitles.setdefault(sub.get('language') or 'und', []).append({
'url': sub['src'],
'name': sub.get('label'),
})
for asset in assets:
asset_key = asset['key']
asset_type = asset['type']
if asset_type != 'medianet':
self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}')
continue
asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON')
ext = mimetype2ext(self._parse_param(asset_data, 'contentType'))
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live)
formats.extend(fmts)
# Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
if not subtitles:
self._merge_subtitles(subs, target=subtitles)
if is_live or not fmts:
continue
# Check for direct https mp4 format
best_video_fmt = traverse_obj(fmts, (
lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all,
{functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {}
base_url = self._search_regex(
r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None)
if not base_url or '/live/' in base_url:
continue
mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4')
if self._request_webpage(
HEADRequest(mp4_url), video_id, 'Checking for https format',
errnote=False, fatal=False):
formats.append({
**best_video_fmt,
'url': mp4_url,
'format_id': 'https-mp4',
'protocol': 'https',
'manifest_url': None,
'acodec': None,
})
else:
formats.append({
'url': asset_data['url'],
'ext': ext,
'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None,
})
chapters = traverse_obj(data, (
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
'title': ('name', {str}),
}))
# Filter out pointless single chapters with start_time==0 and no end_time
if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')):
chapters = []
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', {
'force_smil_url': True,
}),
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {str.strip}),
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
'media_type': ('media', 'clipType', {str}),
'series': ('showName', {str}),
'season_number': ('media', 'season', {int_or_none}),
'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}),
'location': ('media', 'region', {str}),
'tags': ('tags', ..., 'name', {str}),
'genres': ('media', 'genre', all),
'categories': ('categories', ..., 'name', {str}),
}),
'id': video_id,
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
'formats': formats,
'subtitles': subtitles,
'chapters': chapters,
'is_live': is_live,
}
@@ -647,11 +806,11 @@ class CBCGemLiveIE(InfoExtractor):
'title': 'Ottawa',
'description': 'The live TV channel and local programming from Ottawa',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
'is_live': True,
'live_status': 'is_live',
'id': 'AyqZwxRqh8EH',
'ext': 'mp4',
'timestamp': 1492106160,
'upload_date': '20170413',
'release_timestamp': 1492106160,
'release_date': '20170413',
'uploader': 'CBCC-NEW',
},
'skip': 'Live might have ended',
@@ -680,49 +839,84 @@ class CBCGemLiveIE(InfoExtractor):
'description': 'March 24, 2023 | President Bidens Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
'live_status': 'is_live',
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
'timestamp': 1679706000,
'upload_date': '20230325',
'release_timestamp': 1679706000,
'release_date': '20230325',
},
'params': {'skip_download': True},
'skip': 'Live might have ended',
},
{ # event replay (medianetlive)
'url': 'https://gem.cbc.ca/live-event/42314',
'md5': '297a9600f554f2258aed01514226a697',
'info_dict': {
'id': '42314',
'ext': 'mp4',
'live_status': 'was_live',
'title': 'Women\'s Soccer - Canada vs New Zealand',
'description': 'md5:36200e5f1a70982277b5a6ecea86155d',
'thumbnail': r're:https://.+default\.jpg',
'release_timestamp': 1721917200,
'release_date': '20240725',
},
'params': {'skip_download': True},
'skip': 'Replay might no longer be available',
},
{ # event replay (medianetlive)
'url': 'https://gem.cbc.ca/live-event/43273',
'only_matching': True,
},
]
_GEO_COUNTRIES = ['CA']
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
# Two types of metadata JSON
# Three types of video_info JSON: info in root, freeTv stream/item, event replay
if not video_info.get('formattedIdMedia'):
video_info = traverse_obj(
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
get_all=False, default={})
if traverse_obj(video_info, ('event', 'key')) == video_id:
video_info = video_info['event']
else:
video_info = traverse_obj(video_info, (
('freeTv', ('streams', ...)), 'items',
lambda _, v: v['key'].partition('-')[0] == video_id, any)) or {}
video_stream_id = video_info.get('formattedIdMedia')
if not video_stream_id:
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
raise ExtractorError(
'Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
stream_data = self._download_json(
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
'appCode': 'mpx',
'connectionType': 'hd',
'deviceType': 'ipad',
'idMedia': video_stream_id,
'multibitrate': 'true',
'output': 'json',
'tech': 'hls',
'manifestType': 'desktop',
})
live_status = 'was_live' if video_info.get('isVodEnabled') else 'is_live'
release_timestamp = traverse_obj(video_info, ('airDate', {parse_iso8601}))
if live_status == 'is_live' and release_timestamp and release_timestamp > time.time():
formats = []
live_status = 'is_upcoming'
self.raise_no_formats('This livestream has not yet started', expected=True)
else:
stream_data = self._download_json(
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
'appCode': 'medianetlive',
'connectionType': 'hd',
'deviceType': 'ipad',
'idMedia': video_stream_id,
'multibitrate': 'true',
'output': 'json',
'tech': 'hls',
'manifestType': 'desktop',
})
formats = self._extract_m3u8_formats(
stream_data['url'], video_id, 'mp4', live=live_status == 'is_live')
return {
'id': video_id,
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
'is_live': True,
'formats': formats,
'live_status': live_status,
'release_timestamp': release_timestamp,
**traverse_obj(video_info, {
'title': 'title',
'description': 'description',
'title': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('images', 'card', 'url'),
'timestamp': ('airDate', {parse_iso8601}),
}),
}

View File

@@ -35,6 +35,7 @@ from ..networking import HEADRequest, Request
from ..networking.exceptions import (
HTTPError,
IncompleteRead,
TransportError,
network_exceptions,
)
from ..networking.impersonate import ImpersonateTarget
@@ -965,6 +966,9 @@ class InfoExtractor:
return False
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
encoding=encoding, data=data)
if content is False:
assert not fatal
return False
return (content, urlh)
@staticmethod
@@ -1039,7 +1043,15 @@ class InfoExtractor:
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
prefix=None, encoding=None, data=None):
webpage_bytes = urlh.read()
try:
webpage_bytes = urlh.read()
except TransportError as err:
errmsg = f'{video_id}: Error reading response: {err.msg}'
if fatal:
raise ExtractorError(errmsg, cause=err)
self.report_warning(errmsg)
return False
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
if self.get_param('dump_intermediate_pages', False):
@@ -1698,7 +1710,7 @@ class InfoExtractor:
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
if rating is not None:
info['average_rating'] = rating
if is_type(e, 'TVEpisode', 'Episode'):
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
episode_name = unescapeHTML(e.get('name'))
info.update({
'episode': episode_name,
@@ -2065,7 +2077,7 @@ class InfoExtractor:
has_drm = HlsFD._has_drm(m3u8_doc)
def format_url(url):
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@@ -2800,11 +2812,11 @@ class InfoExtractor:
base_url_e = element.find(_add_ns('BaseURL'))
if try_call(lambda: base_url_e.text) is not None:
base_url = base_url_e.text + base_url
if re.match(r'^https?://', base_url):
if re.match(r'https?://', base_url):
break
if mpd_base_url and base_url.startswith('/'):
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
elif mpd_base_url and not re.match(r'^https?://', base_url):
elif mpd_base_url and not re.match(r'https?://', base_url):
if not mpd_base_url.endswith('/'):
mpd_base_url += '/'
base_url = mpd_base_url + base_url
@@ -2894,7 +2906,7 @@ class InfoExtractor:
}
def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path'
return 'url' if re.match(r'https?://', location) else 'path'
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
@@ -3150,7 +3162,7 @@ class InfoExtractor:
})
return formats, subtitles
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None):
def absolute_url(item_url):
return urljoin(base_url, item_url)
@@ -3174,11 +3186,11 @@ class InfoExtractor:
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
preference=preference, quality=quality, fatal=False)
preference=preference, quality=quality, fatal=False, headers=_headers)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
full_url, video_id, mpd_id=mpd_id, fatal=False)
full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers)
else:
is_plain_url = True
formats = [{
@@ -3272,6 +3284,8 @@ class InfoExtractor:
})
for f in media_info['formats']:
f.setdefault('http_headers', {})['Referer'] = base_url
if _headers:
f['http_headers'].update(_headers)
if media_info['formats'] or media_info['subtitles']:
entries.append(media_info)
return entries
@@ -3487,7 +3501,7 @@ class InfoExtractor:
continue
urls.add(source_url)
source_type = source.get('type') or ''
ext = mimetype2ext(source_type) or determine_ext(source_url)
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',

View File

@@ -319,32 +319,6 @@ class DPlayIE(DPlayBaseIE):
url, display_id, host, 'dplay' + country, country, domain)
class HGTVDeIE(DPlayBaseIE):
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
'info_dict': {
'id': '151205',
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
'ext': 'mp4',
'title': 'Wer braucht schon eine Toilette',
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
'duration': 1177.024,
'timestamp': 1595705400,
'upload_date': '20200725',
'creator': 'HGTV',
'series': 'Tiny House - klein, aber oho',
'season_number': 3,
'episode_number': 3,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
return self._get_disco_api_info(
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
class DiscoveryPlusBaseIE(DPlayBaseIE):
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
@@ -373,6 +347,45 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
class HGTVDeIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
'info_dict': {
'id': '7332936',
'ext': 'mp4',
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
'title': 'Vom Landleben ins Loft',
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
'episode': 'Episode 7',
'episode_number': 7,
'season': 'Season 7',
'season_number': 7,
'series': 'Mein Kleinstadt-Traumhaus',
'duration': 2645.0,
'timestamp': 1725998100,
'upload_date': '20240910',
'creators': ['HGTV'],
'tags': [],
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
},
}]
_PRODUCT = 'hgtv'
_DISCO_API_PARAMS = {
'disco_host': 'eu1-prod.disco-api.com',
'realm': 'hgtv',
'country': 'de',
}
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
'x-disco-params': f'realm={realm}',
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
'Authorization': self._get_auth(disco_base, display_id, realm),
})
class GoDiscoveryIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
@@ -934,7 +947,7 @@ class TLCIE(DiscoveryPlusBaseIE):
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport)?' + DPlayBaseIE._PATH_REGEX
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
'info_dict': {
@@ -958,6 +971,9 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE):
}, {
'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa',
'only_matching': True,
}]
_PRODUCT = None
@@ -1144,13 +1160,19 @@ class DiscoveryPlusShowBaseIE(DPlayBaseIE):
class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.com/it/video/olympics/dplus-sport-dplus-sport-sport/water-polo-greece-italy',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.com/it/video/sport/dplus-sport-dplus-sport-sport/lisa-vittozzi-allinferno-e-ritorno',
'only_matching': True,
}]
_PRODUCT = 'dplus_it'

View File

@@ -6,8 +6,10 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
update_url,
update_url_query,
url_basename,
urlencode_postdata,
)
@@ -36,43 +38,58 @@ class DropboxIE(InfoExtractor):
},
]
def _yield_decoded_parts(self, webpage):
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
fn = urllib.parse.unquote(url_basename(url))
title = os.path.splitext(fn)[0]
password = self.get_param('videopassword')
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
or 'Enter the password for this link' in webpage):
for part in self._yield_decoded_parts(webpage):
if '/sm/password' in part:
webpage = self._download_webpage(
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
break
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
or 'Enter the password for this link' in webpage):
if password:
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
response = self._download_json(
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
data=urlencode_postdata({
'is_xhr': 'true',
't': self._get_cookies('https://www.dropbox.com')['t'].value,
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
'password': password,
'url': url,
}))
if response.get('status') != 'authed':
raise ExtractorError('Authentication failed!', expected=True)
webpage = self._download_webpage(url, video_id)
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
webpage = self._download_webpage(url, video_id)
else:
raise ExtractorError('Invalid password', expected=True)
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
webpage = self._download_webpage(url, video_id)
formats, subtitles, has_anonymous_download = [], {}, False
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
formats, subtitles = [], {}
has_anonymous_download = False
thumbnail = None
for part in self._yield_decoded_parts(webpage):
if not has_anonymous_download:
has_anonymous_download = self._search_regex(
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
transcode_url = self._search_regex(
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
if not transcode_url:
continue
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
thumbnail = self._search_regex(
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
break
# downloads enabled we can get the original file
@@ -89,4 +106,5 @@ class DropboxIE(InfoExtractor):
'title': title,
'formats': formats,
'subtitles': subtitles,
'thumbnail': thumbnail,
}

View File

@@ -17,6 +17,7 @@ from ..utils import (
url_or_none,
variadic,
)
from ..utils.traversal import traverse_obj
class ERTFlixBaseIE(InfoExtractor):
@@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
def _extract_formats_and_subs(self, video_id):
media_info = self._call_api(video_id, codename=video_id)
formats, subs = [], {}
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
if not fmt_url:
continue
ext = determine_ext(fmt_url)
if ext == 'm3u8':
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
elif ext == 'mpd':
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
fmt_url, video_id, mpd_id='dash', fatal=False)
else:
formats.append({
'url': fmt_url,
'format_id': str_or_none(media.get('Id')),
})
continue
formats.extend(formats_)
self._merge_subtitles(subs_, target=subs)
formats, subtitles = [], {}
for media in traverse_obj(media_info, (
'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
'Formats', lambda _, v: url_or_none(v['Url']))):
fmt_url = media['Url']
ext = determine_ext(fmt_url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
fmt_url, video_id, mpd_id='dash', fatal=False)
else:
formats.append({
'url': fmt_url,
'format_id': str_or_none(media.get('Id')),
})
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return formats, subs
return formats, subtitles
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
class WatchESPNIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
_TESTS = [{
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
'info_dict': {
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
'ext': 'mp4',
'title': 'Huddersfield vs. Burnley',
'duration': 7500,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
'title': 'Abilene Chrstn vs. Texas Tech',
'duration': 14166,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
'info_dict': {
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
'ext': 'mp4',
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
'duration': 8335,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
'title': 'UC Davis vs. California',
'duration': 9547,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
'info_dict': {
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
'ext': 'mp4',
'title': 'The Wheel - Episode 10',
'duration': 3352,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
'title': 'The College Football Show',
'duration': 3639,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
},
'params': {
'skip_download': True,
@@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
if not cookie:
self.raise_login_required(method='cookies')
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
id_token = self._download_json(
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
}).encode())['data']['token']['id_token']
assertion = self._call_bamgrid_api(
'devices', video_id,
headers={'Content-Type': 'application/json; charset=UTF-8'},
@@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
})['access_token']
assertion = self._call_bamgrid_api(
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
'accounts/grant', video_id, payload={'id_token': id_token},
headers={
'Authorization': token,
'Content-Type': 'application/json; charset=UTF-8',

View File

@@ -3,7 +3,12 @@ from ..utils import traverse_obj
class EurosportIE(InfoExtractor):
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
_VALID_URL = r'''(?x)
https?://(?:
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
eurosport\.tvn24\.pl
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
'''
_TESTS = [{
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
'info_dict': {
@@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
'duration': 105.0,
'upload_date': '20230518',
},
}, {
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
'only_matching': True,
}, {
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
'only_matching': True,
}]
_TOKEN = None
@@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
# but this method require to get sha256 hash
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
_GEO_BYPASS = False
def _real_initialize(self):
if EurosportIE._TOKEN is None:
@@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
for stream_type in json_data['attributes']['streaming']:
if stream_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
elif stream_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
elif stream_type == 'mss':
fmts, subs = self._extract_ism_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)

View File

@@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1692346159,
'thumbnail': r're:^https?://.*',
'uploader_id': '100063551323670',
'duration': 3132.184,
'duration': 3133.583,
'view_count': int,
'concurrent_view_count': 0,
},
@@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
'upload_date': '20140506',
'timestamp': 1399398998,
'thumbnail': r're:^https?://.*',
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
'duration': 131.03,
'concurrent_view_count': int,
'view_count': int,
},
}, {
'note': 'Video with DASH manifest',
@@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
# have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': 'ca63897a90c9452efee5f8c40d080e25',
'md5': '1659aa21fb3dd1585874f668e81a72c8',
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
@@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
'view_count': int,
'uploader_id': '100059479812265',
'concurrent_view_count': int,
'duration': 44.478,
'duration': 44.181,
},
}, {
# FIXME: unable to extract uploader, no formats found
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
@@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
'timestamp': 1511548260,
'upload_date': '20171124',
'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
'thumbnail': r're:^https?://.*',
'duration': 148.435,
'duration': 148.224,
},
}, {
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
@@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
'thumbnail': r're:^https?://.*',
'uploader': 'Lela Evans',
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
'upload_date': '20231228',
'timestamp': 1703804085,
'duration': 394.347,
@@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20180523',
'uploader': 'ESL One Dota 2',
'uploader_id': '100066514874195',
'duration': 4524.212,
'duration': 4524.001,
'view_count': int,
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
@@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
'title': 'Josef',
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
'timestamp': 1549275572,
'duration': 3.413,
'duration': 3.283,
'uploader': 'Josef Novak',
'description': '',
'upload_date': '20190204',
@@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
'playlist_count': 1,
'skip': 'Requires logging in',
}, {
# FIXME: Cannot parse data error
# data.event.cover_media_renderer.cover_video
'url': 'https://m.facebook.com/events/1509582499515440',
'info_dict': {
@@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {})
or get_first(post, ('event', 'event_creator', {dict}))
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
uploader = uploader_data.get('name') or (
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
or self._search_regex(
@@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
'like_count': ('likers', 'count', {int}),
'comment_count': ('total_comment_count', {int}),
'repost_count': ('share_count_reduced', {parse_count}),
}), get_all=False),
}
info_json_ld = self._search_json_ld(webpage, video_id, default={})
@@ -571,16 +580,21 @@ class FacebookIE(InfoExtractor):
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
rf'data-sjs>({{.*?{_filter}.*?}})</script>',
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
def yield_all_relay_data(_filter):
for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage):
yield self._parse_json(relay_data, video_id, fatal=False) or {}
def extract_relay_prefetched_data(_filter):
return traverse_obj(extract_relay_data(_filter), (
'require', (None, (..., ..., ..., '__bbox', 'require')),
def extract_relay_data(_filter):
return next(filter(None, yield_all_relay_data(_filter)), {})
def extract_relay_prefetched_data(_filter, target_keys=None):
path = 'data'
if target_keys is not None:
path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys))
return traverse_obj(yield_all_relay_data(_filter), (
..., 'require', (None, (..., ..., ..., '__bbox', 'require')),
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {}
if not video_data:
server_js_data = self._parse_json(self._search_regex([
@@ -591,7 +605,8 @@ class FacebookIE(InfoExtractor):
if not video_data:
data = extract_relay_prefetched_data(
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)',
target_keys=('video', 'event', 'nodes', 'node', 'mediaset'))
if data:
entries = []
@@ -926,18 +941,21 @@ class FacebookReelIE(InfoExtractor):
_TESTS = [{
'url': 'https://www.facebook.com/reel/1195289147628387',
'md5': 'f13dd37f2633595982db5ed8765474d3',
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
'info_dict': {
'id': '1195289147628387',
'ext': 'mp4',
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
'description': 'md5:22f03309b216ac84720183961441d8db',
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
'uploader': 'Beast Camp Training',
'uploader_id': '100040874179269',
'duration': 9.579,
'timestamp': 1637502609,
'upload_date': '20211121',
'thumbnail': r're:^https?://.*',
'like_count': int,
'comment_count': int,
'repost_count': int,
},
}]
@@ -957,6 +975,7 @@ class FacebookAdsIE(InfoExtractor):
'id': '899206155126718',
'ext': 'mp4',
'title': 'video by Kandao',
'description': 'md5:0822724069e3aca97cbed5dabbab282e',
'uploader': 'Kandao',
'uploader_id': '774114102743284',
'uploader_url': r're:^https?://.*',
@@ -965,6 +984,22 @@ class FacebookAdsIE(InfoExtractor):
'upload_date': '20231214',
'like_count': int,
},
}, {
# key 'watermarked_video_sd_url' missing
'url': 'https://www.facebook.com/ads/library/?id=501152689226254',
'info_dict': {
'id': '501152689226254',
'ext': 'mp4',
'title': 'video by mat.nawrocki',
'description': 'md5:02a446ace7ff8c3c37a2892922492490',
'uploader': 'mat.nawrocki',
'uploader_id': '148586968341456',
'uploader_url': r're:^https?://.*',
'timestamp': 1723452305,
'thumbnail': r're:^https?://.*',
'upload_date': '20240812',
'like_count': int,
},
}, {
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
'info_dict': {
@@ -1011,34 +1046,42 @@ class FacebookAdsIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
post_data = [self._parse_json(j, video_id, fatal=False)
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
data = traverse_obj(post_data, (
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
post_data = traverse_obj(
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
data = get_first(post_data, (
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
if not data:
raise ExtractorError('Unable to extract ad data')
title = data.get('title')
if not title or title == '{{product.name}}':
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
markup_id = traverse_obj(data, ('body', '__m', {str}))
markup = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'markup', lambda _, v: v[0].startswith(markup_id),
..., '__html', {clean_html}, {lambda x: not x.startswith('{{product.') and x}, any))
info_dict = traverse_obj(data, {
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
info_dict = merge_dicts({
'title': title,
'description': markup or None,
}, traverse_obj(data, {
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
'uploader': ('page_name', {str}),
'uploader_id': ('page_id', {str_or_none}),
'uploader_url': ('page_profile_uri', {url_or_none}),
'timestamp': ('creation_time', {int_or_none}),
'like_count': ('page_like_count', {int_or_none}),
})
}))
entries = []
for idx, entry in enumerate(traverse_obj(
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1,
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v.get(f)) for f in self._FORMATS_MAP))), 1,
):
entries.append({
'id': f'{video_id}_{idx}',
'title': entry.get('title') or title,
'description': entry.get('link_description') or info_dict.get('description'),
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
'formats': self._extract_formats(entry),
})

View File

@@ -14,7 +14,7 @@ from ..utils import (
class FC2IE(InfoExtractor):
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
IE_NAME = 'fc2'
_NETRC_MACHINE = 'fc2'
_TESTS = [{

View File

@@ -2340,7 +2340,7 @@ class GenericIE(InfoExtractor):
default_search = 'fixup_error'
if default_search in ('auto', 'auto_warning', 'fixup_error'):
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
if re.match(r'[^\s/]+\.[^\s/]+/', url):
self.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
elif default_search != 'fixup_error':
@@ -2400,7 +2400,7 @@ class GenericIE(InfoExtractor):
# Check for direct link to a video
content_type = full_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
headers = filter_dict({'Referer': smuggled_data.get('referer')})

View File

@@ -0,0 +1,91 @@
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..utils import (
parse_qs,
traverse_obj,
url_or_none,
)
class GermanupaIE(InfoExtractor):
IE_DESC = 'germanupa.de'
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
'info_dict': {
'id': '909179246',
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
'ext': 'mp4',
'uploader': 'German UPA',
'uploader_id': 'germanupa',
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
'uploader_url': 'https://vimeo.com/germanupa',
'duration': 3987,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
'params': {'skip_download': 'm3u8'},
}, {
'note': 'audio, uses GenericIE',
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
'info_dict': {
'id': '1867346676',
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
'ext': 'opus',
'timestamp': 1720545088,
'upload_date': '20240709',
'duration': 3910.557,
'like_count': int,
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
'uploader': 'German UPA',
'repost_count': int,
'genres': ['Science'],
'license': 'all-rights-reserved',
'uploader_url': 'https://soundcloud.com/user-80097677',
'uploader_id': '471579486',
'view_count': int,
'comment_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
},
}, {
'note': 'Nur für Mitglieder/Just for members',
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
'info_dict': {
'id': '986994430',
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
'ext': 'mp4',
'release_date': '20240719',
'uploader_url': 'https://vimeo.com/germanupa',
'timestamp': 1721373980,
'license': 'by-sa',
'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
'duration': 2146,
'release_timestamp': 1721373980,
'uploader': 'German UPA',
'uploader_id': 'germanupa',
'upload_date': '20240719',
'comment_count': int,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
'skip': 'login required',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
param_url = traverse_obj(
self._search_regex(
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
webpage, 'embedded video', default=None, group='url'),
({parse_qs}, 'url', 0, {url_or_none}))
if not param_url:
if self._search_regex(
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
webpage, 'login wrapper', default=None):
self.raise_login_required('This video is only available for members')
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)

View File

@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
_VALID_URL = [
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
]
_TESTS = [{
'url': 'http://academymel.online/3video_1',

View File

@@ -7,7 +7,7 @@ from ..utils import (
class GolemIE(InfoExtractor):
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
_TEST = {
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',

View File

@@ -13,7 +13,7 @@ from ..utils import (
class HRFernsehenIE(InfoExtractor):
IE_NAME = 'hrfernsehen'
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
_TESTS = [{
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
'md5': '5c4e0ba94677c516a2f65a84110fc536',

View File

@@ -8,15 +8,19 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
str_or_none,
try_get,
unescapeHTML,
unified_strdate,
update_url_query,
url_or_none,
)
from ..utils.traversal import traverse_obj
class HuyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
IE_NAME = 'huya:live'
IE_DESC = 'huya.com'
TESTS = [{
@@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
'info_dict': {
'id': '572329',
'title': str,
'ext': 'flv',
'description': str,
'is_live': True,
'view_count': int,
@@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
return fm, ss
class HuyaVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
IE_NAME = 'huya:video'
IE_DESC = '虎牙视频'
_TESTS = [{
'url': 'https://www.huya.com/video/play/1002412640.html',
'info_dict': {
'id': '1002412640',
'ext': 'mp4',
'title': '8月3日',
'thumbnail': r're:https?://.*\.jpg',
'duration': 14,
'uploader': '虎牙-ATS欧卡车队青木',
'uploader_id': '1564376151',
'upload_date': '20240803',
'view_count': int,
'comment_count': int,
'like_count': int,
},
},
{
'url': 'https://www.huya.com/video/play/556054543.html',
'info_dict': {
'id': '556054543',
'ext': 'mp4',
'title': '我不挑事 也不怕事',
'thumbnail': r're:https?://.*\.jpg',
'duration': 1864,
'uploader': '卡尔',
'uploader_id': '367138632',
'upload_date': '20210811',
'view_count': int,
'comment_count': int,
'like_count': int,
},
}]
def _real_extract(self, url: str):
video_id = self._match_id(url)
video_data = self._download_json(
'https://liveapi.huya.com/moment/getMomentContent', video_id,
query={'videoId': video_id})['data']['moment']['videoInfo']
formats = []
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
formats.append({
'url': definition['url'],
**traverse_obj(definition, {
'format_id': ('defName', {str}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
return {
'id': video_id,
'formats': formats,
**traverse_obj(video_data, {
'title': ('videoTitle', {str}),
'thumbnail': ('videoCover', {url_or_none}),
'duration': ('videoDuration', {parse_duration}),
'uploader': ('nickName', {str}),
'uploader_id': ('uid', {str_or_none}),
'upload_date': ('videoUploadTime', {unified_strdate}),
'view_count': ('videoPlayNum', {int_or_none}),
'comment_count': ('videoCommentNum', {int_or_none}),
'like_count': ('favorCount', {int_or_none}),
}),
}

View File

@@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
'id': 'p51388',
'ext': 'mp4',
'title': 'Partička (92)',
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
'upload_date': '20201103',
'timestamp': 1604437480,
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
'episode': 'Partička (92)',
'season': 'Partička',
'series': 'Prima Partička',
'episode_number': 92,
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
},
'params': {
'skip_download': True, # m3u8 download
},
}, {
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
'info_dict': {
'id': 'p1412199',
'ext': 'mp4',
'episode_number': 3,
'episode': 'Tenerife: V říši ohně',
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
'duration': 3111.0,
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
'title': 'Tenerife: V říši ohně',
'timestamp': 1711825800,
'upload_date': '20240330',
},
'params': {
'skip_download': True, # m3u8 download
@@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
video_id = self._search_regex((
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
), webpage, 'real id', group='id', default=None)
if not video_id:
@@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
final_result = self._search_json_ld(webpage, video_id, default={})
final_result.update({
'id': video_id,
'title': title,
'title': final_result.get('title') or title,
'thumbnail': self._html_search_meta(
['thumbnail', 'og:image', 'twitter:image'],
webpage, 'thumbnail', default=None),

View File

@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
class SangiinInstructionIE(InfoExtractor):
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
IE_DESC = False # this shouldn't be listed as a supported site
def _real_extract(self, url):
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
raise ExtractorError(
'Copy the link from the button below the video description/player '
'and use that link to download. If there is no button in the frame, '
'get the URL of the frame showing the video.', expected=True)
class SangiinIE(InfoExtractor):

View File

@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
(?:
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
https?://
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?:
(?:
# flash player

View File

@@ -15,7 +15,7 @@ from ..utils import (
class KhanAcademyBaseIE(InfoExtractor):
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
_PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
def _parse_video(self, video):
return {
@@ -39,7 +39,7 @@ class KhanAcademyBaseIE(InfoExtractor):
query={
'fastly_cacheable': 'persist_until_publish',
'pcv': self._PUBLISHED_CONTENT_VERSION,
'hash': '1242644265',
'hash': '3712657851',
'variables': json.dumps({
'path': display_id,
'countryCode': 'US',

View File

@@ -1,9 +1,14 @@
import functools
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
UserNotLive,
determine_ext,
float_or_none,
int_or_none,
merge_dicts,
parse_iso8601,
str_or_none,
traverse_obj,
unified_timestamp,
@@ -25,104 +30,212 @@ class KickBaseIE(InfoExtractor):
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
return self._download_json(
f'https://kick.com/api/v1/{path}', display_id, note=note,
f'https://kick.com/api/{path}', display_id, note=note,
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
class KickIE(KickBaseIE):
IE_NAME = 'kick:live'
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://kick.com/yuppy',
'url': 'https://kick.com/buddha',
'info_dict': {
'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
'id': '92722911-nopixel-40',
'ext': 'mp4',
'title': str,
'description': str,
'channel': 'yuppy',
'channel_id': '33538',
'uploader': 'Yuppy',
'uploader_id': '33793',
'upload_date': str,
'live_status': 'is_live',
'timestamp': int,
'thumbnail': r're:^https?://.*\.jpg',
'thumbnail': r're:https?://.+\.jpg',
'categories': list,
'upload_date': str,
'channel': 'buddha',
'channel_id': '32807',
'uploader': 'Buddha',
'uploader_id': '33057',
'live_status': 'is_live',
'concurrent_view_count': int,
'release_timestamp': int,
'age_limit': 18,
'release_date': str,
},
'skip': 'livestream',
'params': {'skip_download': 'livestream'},
# 'skip': 'livestream',
}, {
'url': 'https://kick.com/kmack710',
'url': 'https://kick.com/xqc',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url):
channel = self._match_id(url)
response = self._call_api(f'channels/{channel}', channel)
response = self._call_api(f'v2/channels/{channel}', channel)
if not traverse_obj(response, 'livestream', expected_type=dict):
raise UserNotLive(video_id=channel)
return {
'id': str(traverse_obj(
response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
'formats': self._extract_m3u8_formats(
response['playback_url'], channel, 'mp4', live=True),
'title': traverse_obj(
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
'description': traverse_obj(response, ('user', 'bio')),
'channel': channel,
'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
'uploader': traverse_obj(response, 'name', ('user', 'username')),
'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
'is_live': True,
'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
'thumbnail': traverse_obj(
response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True),
**traverse_obj(response, {
'id': ('livestream', 'slug', {str}),
'title': ('livestream', 'session_title', {str}),
'description': ('user', 'bio', {str}),
'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any),
'uploader': (('name', ('user', 'username')), {str}, any),
'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any),
'timestamp': ('livestream', 'created_at', {unified_timestamp}),
'release_timestamp': ('livestream', 'start_time', {unified_timestamp}),
'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}),
'categories': ('recent_categories', ..., 'name', {str}),
'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}),
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
}),
}
class KickVODIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
IE_NAME = 'kick:vod'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'md5': '3870f94153e40e7121a6e46c068b70cb',
'info_dict': {
'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'ext': 'mp4',
'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
'channel': 'jaredfps',
'channel_id': '26608',
'uploader': 'JaredFPS',
'uploader_id': '26799',
'upload_date': '20240402',
'timestamp': 1712097108,
'duration': 33859.0,
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
'channel': 'xqc',
'channel_id': '668',
'uploader': 'xQc',
'uploader_id': '676',
'upload_date': '20240909',
'timestamp': 1725919141,
'duration': 10155.0,
'thumbnail': r're:^https?://.*\.jpg',
'categories': ['Call of Duty: Warzone'],
'view_count': int,
'categories': ['Just Chatting'],
'age_limit': 0,
},
'params': {
'skip_download': 'm3u8',
},
'expected_warnings': [r'impersonation'],
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
response = self._call_api(f'video/{video_id}', video_id)
response = self._call_api(f'v1/video/{video_id}', video_id)
return {
'id': video_id,
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
'title': traverse_obj(
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
'timestamp': unified_timestamp(response.get('created_at')),
'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
'thumbnail': traverse_obj(
response, ('livestream', 'thumbnail'), expected_type=url_or_none),
'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
**traverse_obj(response, {
'title': ('livestream', ('session_title', 'slug'), {str}, any),
'description': ('livestream', 'channel', 'user', 'bio', {str}),
'channel': ('livestream', 'channel', 'slug', {str}),
'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}),
'uploader': ('livestream', 'channel', 'user', 'username', {str}),
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
'timestamp': ('created_at', {parse_iso8601}),
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
'categories': ('livestream', 'categories', ..., 'name', {str}),
'view_count': ('views', {int_or_none}),
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
}),
}
class KickClipIE(KickBaseIE):
IE_NAME = 'kick:clips'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
_TESTS = [{
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
'info_dict': {
'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
'ext': 'mp4',
'title': 'Maddy detains Abd D:',
'channel': 'mxddy',
'channel_id': '133789',
'uploader': 'AbdCreates',
'uploader_id': '3309077',
'thumbnail': r're:^https?://.*\.jpeg',
'duration': 35,
'timestamp': 1682481453,
'upload_date': '20230426',
'view_count': int,
'like_count': int,
'categories': ['VALORANT'],
'age_limit': 18,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3',
'info_dict': {
'id': 'clip_01H9SKET879NE7N9RJRRDS98J3',
'title': 'W jews',
'ext': 'mp4',
'channel': 'destiny',
'channel_id': '1772249',
'uploader': 'punished_furry',
'uploader_id': '2027722',
'duration': 49.0,
'upload_date': '20230908',
'timestamp': 1694150180,
'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png',
'view_count': int,
'like_count': int,
'categories': ['Just Chatting'],
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
'info_dict': {
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
'ext': 'mp4',
'title': 'KLJASLDJKLJKASDLJKDAS',
'channel': 'spreen',
'channel_id': '5312671',
'uploader': 'AnormalBarraBaja',
'uploader_id': '26518262',
'duration': 43.0,
'upload_date': '20240927',
'timestamp': 1727399987,
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
'view_count': int,
'like_count': int,
'categories': ['Minecraft'],
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
clip_id = self._match_id(url)
clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip']
clip_url = clip['clip_url']
if determine_ext(clip_url) == 'm3u8':
formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4')
else:
formats = [{'url': clip_url}]
return {
'id': clip_id,
'formats': formats,
**traverse_obj(clip, {
'title': ('title', {str}),
'channel': ('channel', 'slug', {str}),
'channel_id': ('channel', 'id', {int}, {str_or_none}),
'uploader': ('creator', 'username', {str}),
'uploader_id': ('creator', 'id', {int}, {str_or_none}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'duration': ('duration', {float_or_none}),
'categories': ('category', 'name', {str}, all),
'timestamp': ('created_at', {parse_iso8601}),
'view_count': ('views', {int_or_none}),
'like_count': ('likes', {int_or_none}),
'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}),
}),
}

126
yt_dlp/extractor/kika.py Normal file
View File

@@ -0,0 +1,126 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class KikaIE(InfoExtractor):
IE_DESC = 'KiKA.de'
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
'info_dict': {
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
'ext': 'mp4',
'upload_date': '20240831',
'timestamp': 1725126600,
'season_number': 2024,
'modified_date': '20240831',
'episode': 'Episode 476',
'episode_number': 476,
'season': 'Season 2024',
'duration': 634,
'title': 'logo! vom Samstag, 31. August 2024',
'modified_timestamp': 1725129983,
},
}, {
'url': 'https://www.kika.de/kaltstart/videos/video92498',
'md5': '710ece827e5055094afeb474beacb7aa',
'info_dict': {
'id': 'video92498',
'ext': 'mp4',
'title': '7. Wo ist Leo?',
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
'duration': 436,
'timestamp': 1702926876,
'upload_date': '20231218',
'episode_number': 7,
'modified_date': '20240319',
'modified_timestamp': 1710880610,
'episode': 'Episode 7',
'season_number': 1,
'season': 'Season 1',
},
}, {
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
'info_dict': {
'id': 'video90088',
'ext': 'mp4',
'upload_date': '20221102',
'timestamp': 1667390580,
'duration': 197,
'modified_timestamp': 1711093771,
'episode_number': 8,
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
'modified_date': '20240322',
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
'season_number': 1,
'episode': 'Episode 8',
'season': 'Season 1',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
video_assets = self._download_json(doc['assets']['url'], video_id)
subtitles = {}
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
subtitles['de'] = [{
'url': ttml_resource,
'ext': 'ttml',
}]
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
subtitles.setdefault('de', []).append({
'url': webvtt_resource,
'ext': 'vtt',
})
return {
'id': video_id,
'formats': list(self._extract_formats(video_assets, video_id)),
'subtitles': subtitles,
**traverse_obj(doc, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('date', {parse_iso8601}),
'modified_timestamp': ('modificationDate', {parse_iso8601}),
'duration': ((
('durationInSeconds', {int_or_none}),
('duration', {parse_duration})), any),
'episode_number': ('episodeNumber', {int_or_none}),
'season_number': ('season', {int_or_none}),
}),
}
def _extract_formats(self, media_info, video_id):
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
stream_url = media['url']
ext = determine_ext(stream_url)
if ext == 'm3u8':
yield from self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
yield {
'url': stream_url,
'format_id': ext,
**traverse_obj(media, {
'width': ('frameWidth', {int_or_none}),
'height': ('frameHeight', {int_or_none}),
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
}),
}

View File

@@ -0,0 +1,78 @@
import functools
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_id,
join_nonempty,
parse_duration,
unified_timestamp,
)
from ..utils.traversal import traverse_obj
class LearningOnScreenIE(InfoExtractor):
_VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
_TESTS = [{
'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
'info_dict': {
'id': '005D81B2',
'ext': 'mp4',
'title': 'Planet Earth',
'duration': 3600.0,
'timestamp': 1164567600.0,
'upload_date': '20061126',
'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
},
}]
def _real_initialize(self):
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
self.raise_login_required(
'Use --cookies for authentication. See '
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
'for how to manually pass cookies', method=None)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
details = traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'programme-details')}, {
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
'timestamp': (
{functools.partial(get_element_by_class, 'broadcast-date')},
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
'duration': (
{functools.partial(get_element_by_class, 'prog-running-time')},
{clean_html}, {parse_duration}),
}))
title = details.pop('title', None) or traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
{extract_attributes}, 'data-record-title', {clean_html}))
entries = self._parse_html5_media_entries(
'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
_headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
if not entries:
raise ExtractorError('No video found')
if len(entries) > 1:
duration = details.pop('duration', None)
for idx, entry in enumerate(entries, start=1):
entry.update(details)
entry['id'] = join_nonempty(video_id, idx)
entry['title'] = join_nonempty(title, idx)
return self.playlist_result(entries, video_id, title, duration=duration)
return {
**entries[0],
**details,
'id': video_id,
'title': title,
}

View File

@@ -1,86 +1,11 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
format_field,
int_or_none,
parse_iso8601,
unified_strdate,
)
class LnkGoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
_TESTS = [{
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
'info_dict': {
'id': '10809',
'ext': 'mp4',
'title': "Put'ka: Trys Klausimai",
'upload_date': '20161216',
'description': 'Seniai matytas Putka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
'age_limit': 18,
'duration': 117,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1481904000,
},
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
'info_dict': {
'id': '10467',
'ext': 'mp4',
'title': 'Nėrdas: Kompiuterio Valymas',
'upload_date': '20150113',
'description': 'md5:7352d113a242a808676ff17e69db6a69',
'age_limit': 18,
'duration': 346,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421164800,
},
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
'only_matching': True,
}]
_AGE_LIMITS = {
'N-7': 7,
'N-14': 14,
'S': 18,
}
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups()
video_info = self._download_json(
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
display_id)['videoConfig']['videoInfo']
video_id = str(video_info['id'])
title = video_info['title']
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
formats = self._extract_m3u8_formats(
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
video_id, 'mp4', 'm3u8_native')
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
'duration': int_or_none(video_info.get('duration')),
'description': clean_html(video_info.get('htmlDescription')),
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
'timestamp': parse_iso8601(video_info.get('airDate')),
'view_count': int_or_none(video_info.get('viewsCount')),
}
class LnkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'

View File

@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
},
'params': {'videopassword': 'seniorinfants2'},
}, {
# embed, transcoded-url endpoint sends empty JSON response
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
'md5': '8488817242a0db1cb2ad0ea522553cf6',
'md5': 'b321d261656848c184a94e3b93eae28d',
'info_dict': {
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
'ext': 'mp4',
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
'timestamp': 1657216459,
'duration': 181,
},
'params': {'format': 'bestvideo'}, # Test video-only fixup
'expected_warnings': ['Failed to parse JSON'],
}]
_WEBPAGE_TESTS = [{
@@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
format_url = format_url.replace('-split.m3u8', '.m3u8')
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
# Sometimes only split video/audio formats are available, need to fixup video-only formats
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
for fmt in m3u8_formats:
if is_not_premerged and fmt.get('vcodec') != 'none':
fmt['acodec'] = 'none'
yield {
**fmt,
'url': update_url(fmt['url'], query=query),

View File

@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
video_data = None
# fix meta_url if missing the host address
if re.match(r'^\/\+\/', meta_url):
if re.match(r'\/\+\/', meta_url):
meta_url = urljoin('https://my.mail.ru', meta_url)
if meta_url:

View File

@@ -13,8 +13,8 @@ from ..utils import (
class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
IE_DESC = 'MDR.DE'
_VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
_GEO_COUNTRIES = ['DE']
@@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
},
'skip': '404 not found',
}, {
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
'id': '19636',
'ext': 'mp4',
'title': 'Baumhaus vom 30. Oktober 2015',
'duration': 134,
'uploader': 'KIKA',
},
'skip': '404 not found',
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
'timestamp': 1482541200,
'upload_date': '20161224',
'duration': 4628,
'uploader': 'KIKA',
},
}, {
# audio with alternative playerURL pattern
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
@@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
'duration': 3239,
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
},
}, {
# empty bitrateVideo and bitrateAudio
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
'info_dict': {
'id': '128372',
'ext': 'mp4',
'title': 'Der kleine Wichtel kehrt zurück',
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
'duration': 4876,
'timestamp': 1607823300,
'upload_date': '20201213',
'uploader': 'ZDF',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'only_matching': True,
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'only_matching': True,
'skip': '404 not found',
}, {
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
'only_matching': True,

View File

@@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
(?P<id>[^/#?_]+)'''
_TESTS = [{
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
'info_dict': {
'id': '668177',
'title': 'Az ajtó',
'display_id': 'az-ajto',
'ext': 'mp4',
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
},
}, {
# (old) mediaklikk. date in html.
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
'info_dict': {
@@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230903',
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
},
'skip': 'Webpage redirects to 404 page',
}, {
# (old) m4sport
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
@@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230908',
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
},
'skip': 'Webpage redirects to 404 page',
}, {
# m4sport with *video/ url and no date
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
@@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
'ext': 'mp4',
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
},
'skip': 'Webpage redirects to 404 page',
}, {
# (old) hirado
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
@@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230911',
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
},
'skip': 'Webpage redirects to video list page',
}, {
# (old) petofilive
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
@@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230909',
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
},
'skip': 'Webpage redirects to video list page',
}]
def _real_extract(self, url):
@@ -133,7 +147,9 @@ class MediaKlikkIE(InfoExtractor):
r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
player_data['video'] = player_data.pop('token')
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
player_page = self._download_webpage(
'https://player.mediaklikk.hu/playernew/player.php', video_id,
query=player_data, headers={'Referer': url})
player_json = self._search_json(
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
playlist_url = traverse_obj(
@@ -141,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
if not playlist_url:
raise ExtractorError('Unable to extract playlist url')
formats = self._extract_wowza_formats(
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
return {
'id': video_id,
'title': title,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'upload_date': upload_date,
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
}

View File

@@ -16,7 +16,7 @@ from ..utils import (
class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV'
IE_NAME = 'MangoTV'

View File

@@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):
class OCWMITIE(InfoExtractor):
IE_NAME = 'ocw.mit.edu'
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
_VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
_BASE_URL = 'http://ocw.mit.edu/'
_TESTS = [

View File

@@ -1,16 +1,21 @@
import json
import re
import urllib.parse
import time
import uuid
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
join_nonempty,
jwt_decode_hs256,
parse_duration,
parse_iso8601,
try_get,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
@@ -276,81 +281,225 @@ class MLBVideoIE(MLBBaseIE):
class MLBTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
_NETRC_MACHINE = 'mlb'
_TESTS = [{
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
'info_dict': {
'id': '661581',
'ext': 'mp4',
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
'release_date': '20220702',
'release_timestamp': 1656792300,
},
'params': {
'skip_download': True,
'params': {'skip_download': 'm3u8'},
}, {
# makeup game: has multiple dates, need to avoid games with 'rescheduleDate'
'url': 'https://www.mlb.com/tv/g747039/vd22541c4-5a29-45f7-822b-635ec041cf5e',
'info_dict': {
'id': '747039',
'ext': 'mp4',
'title': '2024-07-29 - Toronto Blue Jays @ Baltimore Orioles',
'release_date': '20240729',
'release_timestamp': 1722280200,
},
'params': {'skip_download': 'm3u8'},
}]
_GRAPHQL_INIT_QUERY = '''\
mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
initSession(device: $device, clientType: $clientType, experience: $experience) {
deviceId
sessionId
entitlements {
code
}
location {
countryCode
regionName
zipCode
latitude
longitude
}
clientExperience
features
}
}'''
_GRAPHQL_PLAYBACK_QUERY = '''\
mutation initPlaybackSession(
$adCapabilities: [AdExperienceType]
$mediaId: String!
$deviceId: String!
$sessionId: String!
$quality: PlaybackQuality
) {
initPlaybackSession(
adCapabilities: $adCapabilities
mediaId: $mediaId
deviceId: $deviceId
sessionId: $sessionId
quality: $quality
) {
playbackSessionId
playback {
url
token
expiration
cdn
}
}
}'''
_APP_VERSION = '7.8.2'
_device_id = None
_session_id = None
_access_token = None
_token_expiry = 0
@property
def _api_headers(self):
if (self._token_expiry - 120) <= time.time():
self.write_debug('Access token has expired; re-logging in')
self._perform_login(*self._get_login_info())
return {'Authorization': f'Bearer {self._access_token}'}
def _real_initialize(self):
if not self._access_token:
self.raise_login_required(
'All videos are only available to registered users', method='password')
def _set_device_id(self, username):
if not self._device_id:
self._device_id = self.cache.load(
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
if self._device_id:
return
self._device_id = str(uuid.uuid4())
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
def _perform_login(self, username, password):
data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
access_token = self._download_json(
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
headers={
'User-Agent': 'okhttp/3.12.1',
'Content-Type': 'application/x-www-form-urlencoded',
}, data=data.encode())['access_token']
try:
self._access_token = self._download_json(
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
'Logging in', 'Unable to log in', headers={
'User-Agent': 'okhttp/3.12.1',
'Content-Type': 'application/x-www-form-urlencoded',
}, data=urlencode_postdata({
'grant_type': 'password',
'username': username,
'password': password,
'scope': 'openid offline_access',
'client_id': '0oa3e1nutA1HLzAKG356',
}))['access_token']
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 400:
raise ExtractorError('Invalid username or password', expected=True)
raise
entitlement = self._download_webpage(
f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None,
headers={
'User-Agent': 'okhttp/3.12.1',
'Authorization': f'Bearer {access_token}',
})
self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0
self._set_device_id(username)
data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
self._access_token = self._download_json(
'https://us.edge.bamgrid.com/token', None,
self._session_id = self._call_api({
'operationName': 'initSession',
'query': self._GRAPHQL_INIT_QUERY,
'variables': {
'device': {
'appVersion': self._APP_VERSION,
'deviceFamily': 'desktop',
'knownDeviceId': self._device_id,
'languagePreference': 'ENGLISH',
'manufacturer': '',
'model': '',
'os': '',
'osVersion': '',
},
'clientType': 'WEB',
},
}, None, 'session ID')['data']['initSession']['sessionId']
def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True):
return self._download_json(
'https://media-gateway.mlb.com/graphql', video_id,
f'Downloading {description}', f'Unable to download {description}', fatal=fatal,
headers={
**self._api_headers,
'Accept': 'application/json',
'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
'Content-Type': 'application/x-www-form-urlencoded',
}, data=data.encode())['access_token']
'Content-Type': 'application/json',
'x-client-name': 'WEB',
'x-client-version': self._APP_VERSION,
}, data=json.dumps(data, separators=(',', ':')).encode())
def _extract_formats_and_subtitles(self, broadcast, video_id):
feed = traverse_obj(broadcast, ('homeAway', {str.title}))
medium = traverse_obj(broadcast, ('type', {str}))
language = traverse_obj(broadcast, ('language', {str.lower}))
format_id = join_nonempty(feed, medium, language)
response = self._call_api({
'operationName': 'initPlaybackSession',
'query': self._GRAPHQL_PLAYBACK_QUERY,
'variables': {
'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'],
'deviceId': self._device_id,
'mediaId': broadcast['mediaId'],
'quality': 'PLACEHOLDER',
'sessionId': self._session_id,
},
}, video_id, f'{format_id} broadcast JSON', fatal=False)
playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict}))
m3u8_url = traverse_obj(playback, ('url', {url_or_none}))
token = traverse_obj(playback, ('token', {str}))
if not (m3u8_url and token):
errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str})))
if 'not entitled' in errors:
raise ExtractorError(errors, expected=True)
elif errors: # Only warn when 'blacked out' since radio formats are available
self.report_warning(f'API returned errors for {format_id}: {errors}')
else:
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
return [], {}
cdn_headers = {'x-cdn-token': token}
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
m3u8_id=format_id, fatal=False, headers=cdn_headers)
for fmt in fmts:
fmt['http_headers'] = cdn_headers
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
fmt.setdefault('language', language)
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
fmt['source_preference'] = 10
return fmts, subs
def _real_extract(self, url):
video_id = self._match_id(url)
airings = self._download_json(
f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
video_id)['data']['Airings']
data = self._download_json(
'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
'gamePk': video_id,
'hydrate': 'broadcasts(all),statusFlags',
})
metadata = traverse_obj(data, (
'dates', ..., 'games',
lambda _, v: str(v['gamePk']) == video_id and not v.get('rescheduleDate'), any))
broadcasts = traverse_obj(metadata, (
'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
formats, subtitles = [], {}
for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
m3u8_url = traverse_obj(self._download_json(
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
note=f'Downloading {format_id} stream info JSON',
errnote=f'Failed to download {format_id} stream info, skipping',
fatal=False, headers={
'Authorization': self._access_token,
'Accept': 'application/vnd.media-service+json; version=2',
}), ('stream', 'complete', {url_or_none}))
if not m3u8_url:
continue
f, s = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
formats.extend(f)
self._merge_subtitles(s, target=subtitles)
for broadcast in broadcasts:
fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
'title': join_nonempty(
traverse_obj(metadata, ('officialDate', {str})),
traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})),
delim=' - '),
'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON',
'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})),
'formats': formats,
'subtitles': subtitles,
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
}

View File

@@ -0,0 +1,121 @@
from .common import InfoExtractor
from ..utils import js_to_json, remove_end, update_url_query
class MojevideoIE(InfoExtractor):
IE_DESC = 'mojevideo.sk'
_VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P<id>\w+)/(?P<display_id>[\w()]+?)\.html'
_TESTS = [{
'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html',
'md5': '384a4628bd2bbd261c5206cf77c38c17',
'info_dict': {
'id': '3d17c',
'ext': 'mp4',
'title': 'Chlapci dobetónovali sme, máme hotovo!',
'display_id': 'chlapci_dobetonovali_sme_mame_hotovo',
'description': 'md5:a0822126044050d304a9ef58c92ddb34',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg',
'duration': 21.0,
'upload_date': '20230919',
'timestamp': 1695129706,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html',
'md5': '517c3e111c53a67d10b429c1f344ba2f',
'info_dict': {
'id': '14677',
'ext': 'mp4',
'title': 'Deň blbec?',
'display_id': 'den_blbec',
'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg',
'duration': 100.0,
'upload_date': '20120515',
'timestamp': 1337076481,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html',
'md5': '64599a23d3ac31cf2fe069e4353d8162',
'info_dict': {
'id': '2feb2',
'ext': 'mp4',
'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)',
'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)',
'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg',
'duration': 240.0,
'upload_date': '20190708',
'timestamp': 1562576592,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html',
'only_matching': True,
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html',
'only_matching': True,
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html',
'only_matching': True,
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id)
video_id_dec = self._search_regex(
r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16))
video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry')
video_hashes = self._search_json(
r'\bvHash\s*=', webpage, 'video hashes', video_id,
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
formats = []
for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
('', 1, 'normálna kvalita'),
('_lq', 0, 'nízka kvalita'),
('_hd', 2, 'HD-720p'),
('_fhd', 3, 'FULL HD-1080p'),
('_2k', 4, '2K-1440p'),
]):
formats.append({
'format_id': f'mp4-{quality}',
'quality': quality,
'format_note': format_note,
'url': update_url_query(
f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', {
'md5': video_hash,
'expires': video_exp,
}),
})
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'title': (self._og_search_title(webpage, default=None)
or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')),
'description': self._og_search_description(webpage),
**self._search_json_ld(webpage, video_id, default={}),
}

View File

@@ -40,7 +40,6 @@ class NiconicoIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215',
'md5': 'd1a75c0823e2f629128c43e1212760f9',
'info_dict': {
'id': 'sm22312215',
'ext': 'mp4',
@@ -56,8 +55,8 @@ class NiconicoIE(InfoExtractor):
'comment_count': int,
'genres': ['未設定'],
'tags': [],
'expected_protocol': str,
},
'params': {'skip_download': 'm3u8'},
}, {
# File downloaded with and without credentials are different, so omit
# the md5 field
@@ -77,8 +76,8 @@ class NiconicoIE(InfoExtractor):
'view_count': int,
'genres': ['音楽・サウンド'],
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
'expected_protocol': str,
},
'params': {'skip_download': 'm3u8'},
}, {
# 'video exists but is marked as "deleted"
# md5 is unstable
@@ -112,7 +111,6 @@ class NiconicoIE(InfoExtractor):
}, {
# video not available via `getflv`; "old" HTML5 video
'url': 'http://www.nicovideo.jp/watch/sm1151009',
'md5': 'f95a3d259172667b293530cc2e41ebda',
'info_dict': {
'id': 'sm1151009',
'ext': 'mp4',
@@ -128,11 +126,10 @@ class NiconicoIE(InfoExtractor):
'comment_count': int,
'genres': ['ゲーム'],
'tags': [],
'expected_protocol': str,
},
'params': {'skip_download': 'm3u8'},
}, {
# "New" HTML5 video
# md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm31464864',
'info_dict': {
'id': 'sm31464864',
@@ -149,12 +146,11 @@ class NiconicoIE(InfoExtractor):
'comment_count': int,
'genres': ['アニメ'],
'tags': [],
'expected_protocol': str,
},
'params': {'skip_download': 'm3u8'},
}, {
# Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488',
'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
'info_dict': {
'id': 'sm18238488',
'ext': 'mp4',
@@ -168,8 +164,8 @@ class NiconicoIE(InfoExtractor):
'comment_count': int,
'genres': ['エンターテイメント'],
'tags': [],
'expected_protocol': str,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
@@ -424,7 +420,7 @@ class NiconicoIE(InfoExtractor):
'x-request-with': 'https://www.nicovideo.jp',
})['data']['contentUrl']
# Getting all audio formats results in duplicate video formats which we filter out later
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
@@ -436,7 +432,6 @@ class NiconicoIE(InfoExtractor):
'asr': ('samplingRate', {int_or_none}),
}), get_all=False),
'acodec': 'aac',
'ext': 'm4a',
}
# Sort before removing dupes to keep the format dicts with the lowest tbr
@@ -458,9 +453,11 @@ class NiconicoIE(InfoExtractor):
if video_id.startswith('so'):
video_id = self._match_id(handle.url)
api_data = self._parse_json(self._html_search_regex(
'data-api-data="([^"]+)"', webpage,
'API data', default='{}'), video_id)
api_data = traverse_obj(
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
('data', 'response', {dict}))
if not api_data:
raise ExtractorError('Server response data not found')
except ExtractorError as e:
try:
api_data = self._download_json(

View File

@@ -10,7 +10,7 @@ from ..utils import (
class NZOnScreenIE(InfoExtractor):
_VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
'info_dict': {

View File

@@ -1,9 +1,6 @@
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
)
class NZZIE(InfoExtractor):
@@ -22,19 +19,14 @@ class NZZIE(InfoExtractor):
'playlist_count': 1,
}]
def _entries(self, webpage, page_id):
for script in re.findall(r'(?s)<script[^>]* data-hid="jw-video-jw[^>]+>(.+?)</script>', webpage):
settings = self._search_json(r'var\s+settings\s*=[^{]*', script, 'settings', page_id, fatal=False)
if entry := self._parse_jwplayer_data(settings, page_id):
yield entry
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
entries = []
for player_element in re.findall(
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
player_params = extract_attributes(player_element)
if player_params.get('data-type') not in ('kaltura_singleArticle',):
self.report_warning('Unsupported player type')
continue
entry_id = player_params['data-id']
entries.append(self.url_result(
'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
return self.playlist_result(entries, page_id)
return self.playlist_result(self._entries(webpage, page_id), page_id)

View File

@@ -1,9 +1,19 @@
from .common import InfoExtractor
from ..utils import int_or_none, try_get
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
parse_qs,
try_get,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class OlympicsReplayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P<id>[^/#&?]+)'
_VALID_URL = r'https?://(?:www\.)?olympics\.com/[a-z]{2}/(?:paris-2024/)?(?:replay|videos?|original-series/episode)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays',
'info_dict': {
@@ -11,26 +21,105 @@ class OlympicsReplayIE(InfoExtractor):
'ext': 'mp4',
'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020',
'upload_date': '20210801',
'timestamp': 1627783200,
'timestamp': 1627797600,
'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3',
'uploader': 'International Olympic Committee',
},
'params': {
'skip_download': True,
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/nua4o7zwyaznoaejpbk2',
'duration': 7017.0,
},
}, {
'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp',
'only_matching': True,
'url': 'https://olympics.com/en/original-series/episode/b-boys-and-b-girls-take-the-spotlight-breaking-life-road-to-paris-2024',
'info_dict': {
'id': '32633650-c5ee-4280-8b94-fb6defb6a9b5',
'ext': 'mp4',
'title': 'B-girl Nicka - Breaking Life, Road to Paris 2024 | Episode 1',
'upload_date': '20240517',
'timestamp': 1715948200,
'description': 'md5:f63d728a41270ec628f6ac33ce471bb1',
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/a3j96l7j6so3vyfijby1',
'duration': 1321.0,
},
}, {
'url': 'https://olympics.com/en/paris-2024/videos/men-s-preliminaries-gbr-esp-ned-rsa-hockey-olympic-games-paris-2024',
'info_dict': {
'id': '3d96db23-8eee-4b7c-8ef5-488a0361026c',
'ext': 'mp4',
'title': 'Men\'s Preliminaries GBR-ESP & NED-RSA | Hockey | Olympic Games Paris 2024',
'upload_date': '20240727',
'timestamp': 1722066600,
},
'skip': 'Geo-restricted to RU, BR, BT, NP, TM, BD, TL',
}, {
'url': 'https://olympics.com/en/paris-2024/videos/dnp-suni-lee-i-have-goals-and-i-have-expectations-for-myself-but-i-also-am-trying-to-give-myself-grace',
'info_dict': {
'id': 'a42f37ab-8a74-41d0-a7d9-af27b7b02a90',
'ext': 'mp4',
'title': 'md5:c7cfbc9918636a98e66400a812e4d407',
'upload_date': '20240729',
'timestamp': 1722288600,
},
}]
_GEO_BYPASS = False
def _extract_from_nextjs_data(self, webpage, video_id):
data = traverse_obj(self._search_nextjs_data(webpage, video_id, default={}), (
'props', 'pageProps', 'page', 'items',
lambda _, v: v['name'] == 'videoPlaylist', 'data', 'currentVideo', {dict}, any))
if not data:
return None
geo_countries = traverse_obj(data, ('countries', ..., {str}))
if traverse_obj(data, ('geoRestrictedVideo', {bool})):
self.raise_geo_restricted(countries=geo_countries)
is_live = traverse_obj(data, ('streamingStatus', {str})) == 'LIVE'
m3u8_url = traverse_obj(data, ('videoUrl', {url_or_none})) or data['streamUrl']
tokenized_url = self._tokenize_url(m3u8_url, data['jwtToken'], is_live, video_id)
try:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
tokenized_url, video_id, 'mp4', m3u8_id='hls')
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and 'georestricted' in e.cause.msg:
self.raise_geo_restricted(countries=geo_countries)
raise
return {
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
**traverse_obj(data, {
'id': ('videoID', {str}),
'title': ('title', {str}),
'timestamp': ('contentDate', {parse_iso8601}),
}),
}
def _tokenize_url(self, url, token, is_live, video_id):
return self._download_json(
'https://metering.olympics.com/tokengenerator', video_id,
'Downloading tokenized m3u8 url', query={
**parse_qs(url),
'url': update_url(url, query=None),
'service-id': 'live' if is_live else 'vod',
'user-auth': token,
})['data']['url']
def _legacy_tokenize_url(self, url, video_id):
return self._download_json(
'https://olympics.com/tokenGenerator', video_id,
'Downloading legacy tokenized m3u8 url', query={'url': url})
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if info := self._extract_from_nextjs_data(webpage, video_id):
return info
title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
uuid = self._html_search_meta('episode_uid', webpage)
video_uuid = self._html_search_meta('episode_uid', webpage)
m3u8_url = self._html_search_meta('video_url', webpage)
json_ld = self._search_json_ld(webpage, uuid)
json_ld = self._search_json_ld(webpage, video_uuid)
thumbnails_list = json_ld.get('image')
if not thumbnails_list:
thumbnails_list = self._html_search_regex(
@@ -48,12 +137,12 @@ class OlympicsReplayIE(InfoExtractor):
'width': width,
'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)),
})
m3u8_url = self._download_json(
f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
self._legacy_tokenize_url(m3u8_url, video_uuid), video_uuid, 'mp4', m3u8_id='hls')
return {
'id': uuid,
'id': video_uuid,
'title': title,
'thumbnails': thumbnails,
'formats': formats,

View File

@@ -420,7 +420,7 @@ class PatreonIE(PatreonBaseIE):
class PatreonCampaignIE(PatreonBaseIE):
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P<campaign_id>\d+))|(?P<vanity>[-\w]+))'
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m|api/campaigns)/(?P<campaign_id>\d+)|(?P<vanity>[-\w]+))'
_TESTS = [{
'url': 'https://www.patreon.com/dissonancepod/',
'info_dict': {
@@ -442,25 +442,44 @@ class PatreonCampaignIE(PatreonBaseIE):
'url': 'https://www.patreon.com/m/4767637/posts',
'info_dict': {
'title': 'Not Just Bikes',
'channel_follower_count': int,
'id': '4767637',
'channel_id': '4767637',
'channel_url': 'https://www.patreon.com/notjustbikes',
'description': 'md5:595c6e7dca76ae615b1d38c298a287a1',
'description': 'md5:9f4b70051216c4d5c58afe580ffc8d0f',
'age_limit': 0,
'channel': 'Not Just Bikes',
'uploader_url': 'https://www.patreon.com/notjustbikes',
'uploader': 'Not Just Bikes',
'uploader': 'Jason',
'uploader_id': '37306634',
'thumbnail': r're:^https?://.*$',
},
'playlist_mincount': 71,
}, {
'url': 'https://www.patreon.com/api/campaigns/4243769/posts',
'info_dict': {
'title': 'Second Thought',
'channel_follower_count': int,
'id': '4243769',
'channel_id': '4243769',
'channel_url': 'https://www.patreon.com/secondthought',
'description': 'md5:69c89a3aba43efdb76e85eb023e8de8b',
'age_limit': 0,
'channel': 'Second Thought',
'uploader_url': 'https://www.patreon.com/secondthought',
'uploader': 'JT Chapman',
'uploader_id': '32718287',
'thumbnail': r're:^https?://.*$',
},
'playlist_mincount': 201,
}, {
'url': 'https://www.patreon.com/dissonancepod/posts',
'only_matching': True,
}, {
'url': 'https://www.patreon.com/m/5932659',
'only_matching': True,
}, {
'url': 'https://www.patreon.com/api/campaigns/4243769',
'only_matching': True,
}]
@classmethod

View File

@@ -109,7 +109,7 @@ class PinterestBaseIE(InfoExtractor):
class PinterestIE(PinterestBaseIE):
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?P<id>\d+)'
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?:[\w-]+--)?(?P<id>\d+)'
_TESTS = [{
# formats found in data['videos']
'url': 'https://www.pinterest.com/pin/664281013778109217/',
@@ -174,6 +174,25 @@ class PinterestIE(PinterestBaseIE):
}, {
'url': 'https://co.pinterest.com/pin/824721750502199491/',
'only_matching': True,
},
{
'url': 'https://pinterest.com/pin/dive-into-serenity-blue-lagoon-pedi-nails-for-a-tranquil-and-refreshing-spa-experience-video-in-2024--2885187256207927',
'info_dict': {
'id': '2885187256207927',
'ext': 'mp4',
'title': 'Dive into Serenity: Blue Lagoon Pedi Nails for a Tranquil and Refreshing Spa Experience! 💙💅',
'description': 'md5:5da41c767d2317e42e49b663b0b2150f',
'uploader': 'Glamour Artistry |Everyday Outfits, Luxury Fashion & Nail Designs',
'uploader_id': '1142999717836434688',
'upload_date': '20240702',
'timestamp': 1719939156,
'duration': 7.967,
'comment_count': int,
'repost_count': int,
'categories': 'count:9',
'tags': ['#BlueLagoonPediNails', '#SpaExperience'],
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
}]
def _real_extract(self, url):

View File

@@ -628,8 +628,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
for e in page_entries:
yield e
yield from page_entries
if not self._has_more(webpage):
break

View File

@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
join_nonempty,
time_seconds,
try_call,
unified_timestamp,
@@ -167,7 +168,7 @@ class RadikoBaseIE(InfoExtractor):
class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)'
_TESTS = [{
# QRR (文化放送) station provides <desc>
@@ -183,8 +184,9 @@ class RadikoIE(RadikoBaseIE):
}]
def _real_extract(self, url):
station, video_id = self._match_valid_url(url).groups()
vid_int = unified_timestamp(video_id, False)
station, timestring = self._match_valid_url(url).group('station', 'timestring')
video_id = join_nonempty(station, timestring)
vid_int = unified_timestamp(timestring, False)
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
auth_token, area_id = self._auth_client()
@@ -207,7 +209,7 @@ class RadikoIE(RadikoBaseIE):
'ft': radio_begin,
'end_at': radio_end,
'to': radio_end,
'seek': video_id,
'seek': timestring,
},
),
}

View File

@@ -16,7 +16,7 @@ from ..utils import (
class RadioFranceIE(InfoExtractor):
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
_VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
IE_NAME = 'radiofrance'
_TEST = {

View File

@@ -6,7 +6,7 @@ from ..utils import (
class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',

View File

@@ -8,7 +8,7 @@ from ..utils import js_to_json
class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
'md5': 'e736ce0c665e459ddb818546220b4ef8',
@@ -19,9 +19,25 @@ class RTPIE(InfoExtractor):
'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril',
'md5': '9a81ed53f2b2197cfa7ed455b12f8ade',
'info_dict': {
'id': 'e757904',
'ext': 'mp4',
'title': '25 Curiosidades, 25 de Abril',
'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr',
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,
}, {
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano',
'only_matching': True,
}, {
'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon',
'only_matching': True,
}]
_RX_OBFUSCATION = re.compile(r'''(?xs)
@@ -49,17 +65,17 @@ class RTPIE(InfoExtractor):
f, config = self._search_regex(
r'''(?sx)
var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*
(?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)?
var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
''', webpage,
'player config', group=('f', 'config'))
f = self._parse_json(
f, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
config = self._parse_json(
config, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
f = config['file'] if not f else self._parse_json(
f, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
formats = []
if isinstance(f, dict):

View File

@@ -8,14 +8,17 @@ from ..utils import (
UnsupportedError,
clean_html,
determine_ext,
extract_attributes,
format_field,
get_element_by_class,
get_elements_html_by_class,
int_or_none,
join_nonempty,
parse_count,
parse_iso8601,
traverse_obj,
unescapeHTML,
urljoin,
)
@@ -382,8 +385,10 @@ class RumbleChannelIE(InfoExtractor):
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
break
raise
for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
yield self.url_result('https://rumble.com' + video_url)
for video_url in traverse_obj(
get_elements_html_by_class('videostream__link', webpage), (..., {extract_attributes}, 'href'),
):
yield self.url_result(urljoin('https://rumble.com', video_url))
def _real_extract(self, url):
url, playlist_id = self._match_valid_url(url).groups()

View File

@@ -6,6 +6,7 @@ from ..utils import (
determine_ext,
int_or_none,
parse_qs,
traverse_obj,
try_get,
unified_timestamp,
url_or_none,
@@ -80,6 +81,8 @@ class RutubeBaseIE(InfoExtractor):
'url': format_url,
'format_id': format_id,
})
for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False))
return formats
def _download_and_extract_formats(self, video_id, query=None):
@@ -90,7 +93,7 @@ class RutubeBaseIE(InfoExtractor):
class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_VALID_URL = r'https?://rutube\.ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
_TESTS = [{
@@ -164,6 +167,29 @@ class RutubeIE(RutubeBaseIE):
'uploader': 'Стас Быков',
},
'expected_warnings': ['Unable to download f4m'],
}, {
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
'info_dict': {
'id': 'c58f502c7bb34a8fcdd976b221fca292',
'ext': 'mp4',
'categories': ['Телепередачи'],
'description': '',
'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
'live_status': 'is_live',
'age_limit': 0,
'uploader_id': '23460655',
'timestamp': 1652972968,
'view_count': int,
'upload_date': '20220519',
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader': 'Первый канал',
},
}, {
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
'only_matching': True,
}, {
'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
'only_matching': True,
}]
@classmethod

View File

@@ -36,7 +36,7 @@ class SampleFocusIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, display_id, impersonate=True)
sample_id = self._search_regex(
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
@@ -82,7 +82,15 @@ class SampleFocusIE(InfoExtractor):
return {
'id': sample_id,
'title': title,
'url': mp3_url,
'formats': [{
'url': mp3_url,
'ext': 'mp3',
'vcodec': 'none',
'acodec': 'mp3',
'http_headers': {
'Referer': url,
},
}],
'display_id': display_id,
'thumbnail': thumbnail,
'uploader': uploader,

View File

@@ -0,0 +1,33 @@
from .common import InfoExtractor
class ScreenRecIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?screenrec\.com/share/(?P<id>\w{10})'
_TESTS = [{
'url': 'https://screenrec.com/share/DasLtbknYo',
'info_dict': {
'id': 'DasLtbknYo',
'ext': 'mp4',
'title': '02.05.2024_03.01.25_REC',
'description': 'Recorded with ScreenRec',
'thumbnail': r're:^https?://.*\.gif$',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8_url = self._search_regex(
r'customUrl\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 URL', group='url')
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'),
}

36
yt_dlp/extractor/sen.py Normal file
View File

@@ -0,0 +1,36 @@
from .common import InfoExtractor
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class SenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sen\.com/video/(?P<id>[0-9a-f-]+)'
_TEST = {
'url': 'https://www.sen.com/video/eef46eb1-4d79-4e28-be9d-bd937767f8c4',
'md5': 'ff615aca9691053c94f8f10d96cd7884',
'info_dict': {
'id': 'eef46eb1-4d79-4e28-be9d-bd937767f8c4',
'ext': 'mp4',
'description': 'Florida, 28 Sep 2022',
'title': 'Hurricane Ian',
'tags': ['North America', 'Storm', 'Weather'],
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
api_data = self._download_json(f'https://api.sen.com/content/public/video/{video_id}', video_id)
m3u8_url = (traverse_obj(api_data, (
'data', 'nodes', lambda _, v: v['id'] == 'player', 'video', 'url', {url_or_none}, any))
or f'https://vod.sen.com/videos/{video_id}/manifest.m3u8')
return {
'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
**traverse_obj(api_data, ('data', 'nodes', lambda _, v: v['id'] == 'details', any, 'content', {
'title': ('title', 'text', {str}),
'description': ('descriptions', 0, 'text', {str}),
'tags': ('badges', ..., 'text', {str}),
})),
}

View File

@@ -27,7 +27,7 @@ class ServusIE(InfoExtractor):
'info_dict': {
'id': 'AA-28BYCQNH92111',
'ext': 'mp4',
'title': 'Klettersteige in den Alpen',
'title': 'Vie Ferrate - Klettersteige in den Alpen',
'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2823,
@@ -38,6 +38,7 @@ class ServusIE(InfoExtractor):
'season_number': 11,
'episode': 'Episode 8 - Vie Ferrate Klettersteige in den Alpen',
'episode_number': 8,
'categories': ['Bergwelten'],
},
'params': {'skip_download': 'm3u8'},
}, {
@@ -71,8 +72,11 @@ class ServusIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url).upper()
webpage = self._download_webpage(url, video_id)
next_data = self._search_nextjs_data(webpage, video_id, fatal=False)
video = self._download_json(
'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin',
'https://api-player.redbull.com/stv/servus-tv-playnet',
video_id, 'Downloading video JSON', query={'videoId': video_id})
if not video.get('videoUrl'):
self._report_errors(video)
@@ -89,7 +93,7 @@ class ServusIE(InfoExtractor):
return {
'id': video_id,
'title': video.get('title'),
'description': self._get_description(video_id) or video.get('description'),
'description': self._get_description(next_data) or video.get('description'),
'thumbnail': video.get('poster'),
'duration': float_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('currentSunrise')),
@@ -100,16 +104,19 @@ class ServusIE(InfoExtractor):
'episode_number': episode_number,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(next_data, ('props', 'pageProps', 'data', {
'title': ('title', 'rendered', {str}),
'timestamp': ('stv_date', 'raw', {int}),
'duration': ('stv_duration', {float_or_none}),
'categories': ('category_names', ..., {str}),
})),
}
def _get_description(self, video_id):
info = self._download_json(
f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page',
video_id, fatal=False)
return join_nonempty(*traverse_obj(info, (
('stv_short_description', 'stv_long_description'),
{lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
def _get_description(self, next_data):
return join_nonempty(*traverse_obj(next_data, (
'props', 'pageProps', 'data',
('stv_short_description', 'stv_long_description'), {str},
{lambda x: x.replace('\n\n', '\n')}, {unescapeHTML})), delim='\n\n')
def _report_errors(self, video):
playability_errors = traverse_obj(video, ('playabilityErrors', ...))

View File

@@ -0,0 +1,76 @@
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, url_or_none
from ..utils.traversal import traverse_obj
class SnapchatSpotlightIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?snapchat\.com/spotlight/(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
'md5': '46c580f63592d0cbb76e974d2f9f0fcc',
'info_dict': {
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
'ext': 'mp4',
'title': 'Views 💕',
'description': '',
'thumbnail': r're:https://cf-st\.sc-cdn\.net/d/kKJHIR1QAznRKK9jgYYDq\.256\.IRZXSOY',
'duration': 4.665,
'timestamp': 1637777831.369,
'upload_date': '20211124',
'repost_count': int,
'uploader': 'shreypatel57',
'uploader_url': 'https://www.snapchat.com/add/shreypatel57',
},
}, {
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
'md5': '4cd9626458c1a0e3e6dbe72c544a9ec2',
'info_dict': {
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
'ext': 'mp4',
'title': 'Spotlight Snap',
'description': 'How he flirt her teacher🤭🤭🤩😍 #kdrama#cdrama #dramaclips #dramaspotlight',
'thumbnail': r're:https://cf-st\.sc-cdn\.net/i/ztfr6xFs0FOcFhwVczWfj\.256\.IRZXSOY',
'duration': 10.91,
'timestamp': 1722720291.307,
'upload_date': '20240803',
'view_count': int,
'repost_count': int,
'uploader': 'ganda0535',
'uploader_url': 'https://www.snapchat.com/add/ganda0535',
'tags': ['#dramaspotlight', '#dramaclips', '#cdrama', '#kdrama'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
page_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
video_data = traverse_obj(page_props, (
'spotlightFeed', 'spotlightStories',
lambda _, v: v['story']['storyId']['value'] == video_id, 'metadata', any), None)
return {
'id': video_id,
'ext': 'mp4',
**traverse_obj(video_data, ('videoMetadata', {
'title': ('name', {str}),
'description': ('description', {str}),
'timestamp': ('uploadDateMs', {lambda x: float_or_none(x, 1000)}),
'view_count': ('viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
'repost_count': ('shareCount', {int_or_none}),
'url': ('contentUrl', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'duration': ('durationMs', {lambda x: float_or_none(x, 1000)}),
'thumbnail': ('thumbnailUrl', {url_or_none}),
'uploader': ('creator', 'personCreator', 'username', {str}),
'uploader_url': ('creator', 'personCreator', 'url', {url_or_none}),
})),
**traverse_obj(video_data, {
'description': ('description', {str}),
'tags': ('hashtags', ..., {str}),
'view_count': ('engagementStats', 'viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
'repost_count': ('engagementStats', 'shareCount', {int_or_none}),
}),
}

View File

@@ -472,7 +472,7 @@ class SVTPageIE(SVTBaseIE):
title = self._og_search_title(webpage)
urql_state = self._search_json(
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}

View File

@@ -8,7 +8,7 @@ from ..utils import (
class Tele13IE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_TESTS = [
{
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',

View File

@@ -1,33 +1,31 @@
import base64
import datetime as dt
import functools
import itertools
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin
from ..utils import int_or_none, traverse_obj, url_or_none, urljoin
class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
_NETRC_MACHINE = '10play'
_TESTS = [{
'url': 'https://10play.com.au/neighbours/web-extras/season-39/nathan-borg-is-the-first-aussie-actor-with-a-cochlear-implant-to-join-neighbours/tpv210128qupwd',
'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz',
'info_dict': {
'id': '6226844312001',
'id': '6336940246112',
'ext': 'mp4',
'title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
'duration': 186,
'season': 'Season 39',
'season_number': 39,
'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach',
'duration': 74,
'season': 'Season 41',
'season_number': 41,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'uploader': 'Channel 10',
'age_limit': 15,
'timestamp': 1611810000,
'upload_date': '20210128',
'timestamp': 1694386800,
'upload_date': '20230910',
'uploader_id': '2199827728001',
},
'params': {
@@ -35,21 +33,30 @@ class TenPlayIE(InfoExtractor):
},
'skip': 'Only available in Australia',
}, {
'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh',
'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
'info_dict': {
'id': '6192880312001',
'id': '9000000000091177',
'ext': 'mp4',
'title': "Todd Sampson's Body Hack - S4 Ep. 2",
'description': 'md5:fa278820ad90f08ea187f9458316ac74',
'title': 'Neighbours - S42 Ep. 9107',
'alt_title': 'Thu 05 Sep',
'description': 'md5:37a1f4271be34b9ee2b533426a5fbaef',
'duration': 1388,
'episode': 'Episode 9107',
'episode_number': 9107,
'season': 'Season 42',
'season_number': 42,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'age_limit': 15,
'timestamp': 1600770600,
'upload_date': '20200922',
'timestamp': 1725517860,
'upload_date': '20240905',
'uploader': 'Channel 10',
'uploader_id': '2199827728001',
},
'params': {
'skip_download': True,
},
'skip': 'Only available in Australia',
}, {
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
'only_matching': True,
@@ -66,55 +73,42 @@ class TenPlayIE(InfoExtractor):
'X': 18,
}
def _get_bearer_token(self, video_id):
username, password = self._get_login_info()
if username is None or password is None:
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
_timestamp = dt.datetime.now().strftime('%Y%m%d000000')
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
'X-Network-Ten-Auth': _auth_header,
}, data=urlencode_postdata({
'email': username,
'password': password,
}))
return 'Bearer ' + data['jwt']['accessToken']
def _real_extract(self, url):
content_id = self._match_id(url)
data = self._download_json(
'https://10play.com.au/api/v1/videos/' + content_id, content_id)
headers = {}
if data.get('memberGated') is True:
_token = self._get_bearer_token(content_id)
headers = {'Authorization': _token}
_video_url = self._download_json(
data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
headers=headers).get('source')
m3u8_url = self._request_webpage(HEADRequest(
_video_url), content_id).url
video_data = self._download_json(
f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}',
content_id, 'Downloading video JSON')
m3u8_url = self._request_webpage(
HEADRequest(video_data['items'][0]['HLSURL']),
content_id, 'Checking stream URL').url
if '10play-not-in-oz' in m3u8_url:
self.raise_geo_restricted(countries=['AU'])
# Attempt to get a higher quality stream
m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000')
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
return {
'id': content_id,
'formats': formats,
'subtitles': {'en': [{'url': data.get('captionUrl')}]} if data.get('captionUrl') else None,
'id': data.get('altId') or content_id,
'duration': data.get('duration'),
'title': data.get('subtitle'),
'alt_title': data.get('title'),
'description': data.get('description'),
'age_limit': self._AUS_AGES.get(data.get('classification')),
'series': data.get('tvShow'),
'season_number': int_or_none(data.get('season')),
'episode_number': int_or_none(data.get('episode')),
'timestamp': data.get('published'),
'thumbnail': data.get('imageUrl'),
'subtitles': {'en': [{'url': data['captionUrl']}]} if url_or_none(data.get('captionUrl')) else None,
'uploader': 'Channel 10',
'uploader_id': '2199827728001',
**traverse_obj(data, {
'id': ('altId', {str}),
'duration': ('duration', {int_or_none}),
'title': ('subtitle', {str}),
'alt_title': ('title', {str}),
'description': ('description', {str}),
'age_limit': ('classification', {self._AUS_AGES.get}),
'series': ('tvShow', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'timestamp': ('published', {int_or_none}),
'thumbnail': ('imageUrl', {url_or_none}),
}),
}

View File

@@ -23,7 +23,6 @@ from ..utils import (
mimetype2ext,
parse_qs,
qualities,
remove_start,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
@@ -254,7 +253,16 @@ class TikTokBaseIE(InfoExtractor):
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
# TODO: Extract text positioning info
EXT_MAP = { # From lowest to highest preference
'creator_caption': 'json',
'srt': 'srt',
'webvtt': 'vtt',
}
preference = qualities(tuple(EXT_MAP.values()))
subtitles = {}
# aweme/detail endpoint subs
captions_info = traverse_obj(
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
@@ -278,8 +286,8 @@ class TikTokBaseIE(InfoExtractor):
if not caption.get('url'):
continue
subtitles.setdefault(caption.get('lang') or 'en', []).append({
'ext': remove_start(caption.get('caption_format'), 'web'),
'url': caption['url'],
'ext': EXT_MAP.get(caption.get('Format')),
})
# webpage subs
if not subtitles:
@@ -288,9 +296,14 @@ class TikTokBaseIE(InfoExtractor):
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
'ext': remove_start(caption.get('Format'), 'web'),
'url': caption['Url'],
'ext': EXT_MAP.get(caption.get('Format')),
})
# Deprioritize creator_caption json since it can't be embedded or used by media players
for lang, subs_list in subtitles.items():
subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))
return subtitles
def _parse_url_key(self, url_key):
@@ -529,16 +542,12 @@ class TikTokBaseIE(InfoExtractor):
**COMMON_FORMAT_INFO,
'format_id': 'download',
'url': self._proto_relative_url(download_url),
'format_note': 'watermarked',
'preference': -2,
})
self._remove_duplicate_formats(formats)
for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
f.update({
'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
'preference': f.get('preference') or -2,
})
# Is it a slideshow with only audio for download?
if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})):
audio_url = aweme_detail['music']['playUrl']
@@ -552,7 +561,8 @@ class TikTokBaseIE(InfoExtractor):
'vcodec': 'none',
})
return formats
# Filter out broken formats, see https://github.com/yt-dlp/yt-dlp/issues/11034
return [f for f in formats if urllib.parse.urlparse(f['url']).hostname != 'www.tiktok.com']
def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False):
author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {

View File

@@ -1,60 +1,29 @@
import functools
import re
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
from ..utils.traversal import traverse_obj
class TVAIE(InfoExtractor):
_VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
IE_NAME = 'tvaplus'
IE_DESC = 'TVA+'
_VALID_URL = r'https?://(?:www\.)?tvaplus\.ca/(?:[^/?#]+/)*[\w-]+-(?P<id>\d+)(?:$|[#?])'
_TESTS = [{
'url': 'https://videos.tva.ca/details/_5596811470001',
'info_dict': {
'id': '5596811470001',
'ext': 'mp4',
'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
'uploader_id': '5481942443001',
'upload_date': '20171003',
'timestamp': 1507064617,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'https://video.tva.ca/details/_5596811470001',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'id': video_id,
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
'ie_key': 'BrightcoveNew',
}
class QubIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
'url': 'https://www.tvaplus.ca/tva/alerte-amber/saison-1/episode-01-1000036619',
'md5': '949490fd0e7aee11d0543777611fbd53',
'info_dict': {
'id': '6084352463001',
'ext': 'mp4',
'title': 'Ép 01. Mon dernier jour',
'title': 'Mon dernier jour',
'uploader_id': '5481942443001',
'upload_date': '20190907',
'timestamp': 1567899756,
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
'thumbnail': r're:https://.+\.jpg',
'episode': 'Ép 01. Mon dernier jour',
'episode': 'Mon dernier jour',
'episode_number': 1,
'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
'duration': 2625.963,
@@ -64,23 +33,36 @@ class QubIE(InfoExtractor):
'channel': 'TVA',
},
}, {
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
'only_matching': True,
'url': 'https://www.tvaplus.ca/tva/le-baiser-du-barbu/le-baiser-du-barbu-886644190',
'info_dict': {
'id': '6354448043112',
'ext': 'mp4',
'title': 'Le Baiser du barbu',
'uploader_id': '5481942443001',
'upload_date': '20240606',
'timestamp': 1717694023,
'description': 'md5:025b1219086c1cbf4bc27e4e034e8b57',
'thumbnail': r're:https://.+\.jpg',
'episode': 'Le Baiser du barbu',
'tags': ['fullepisode', 'films'],
'duration': 6053.504,
'series': 'Le Baiser du barbu',
'channel': 'TVA',
},
}]
# reference_id also works with old account_id(5481942443001)
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
_BC_URL_TMPL = 'https://players.brightcove.net/5481942443001/default_default/index.html?videoId={}'
def _real_extract(self, url):
entity_id = self._match_id(url)
webpage = self._download_webpage(url, entity_id)
entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
entity = self._search_nextjs_data(webpage, entity_id)['props']['pageProps']['staticEntity']
video_id = entity['videoId']
episode = strip_or_none(entity.get('name'))
return {
'_type': 'url_transparent',
'url': f'https://videos.tva.ca/details/_{video_id}',
'ie_key': TVAIE.ie_key(),
'url': smuggle_url(self._BC_URL_TMPL.format(video_id), {'geo_countries': ['CA']}),
'ie_key': BrightcoveNewIE.ie_key(),
'id': video_id,
'title': episode,
'episode': episode,

View File

@@ -10,7 +10,7 @@ from ..utils import (
class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'skip': 'videos are only available for 7 days',
'url': 'https://tver.jp/episodes/ep83nf3w4p',
@@ -23,6 +23,20 @@ class TVerIE(InfoExtractor):
'channel': 'テレビ朝日',
},
'add_ie': ['BrightcoveNew'],
}, {
'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
'info_dict': {
'id': '6359578055112',
'ext': 'mp4',
'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
'timestamp': 1722279928,
'upload_date': '20240729',
'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
'uploader_id': '4774017240001',
'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
'duration': 670.571,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://tver.jp/corner/f0103888',
'only_matching': True,
@@ -47,7 +61,15 @@ class TVerIE(InfoExtractor):
def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type')
if video_type not in {'series', 'episodes'}:
if video_type == 'olympic/paris2024/video':
# Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
# https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
return self.url_result(smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
{'geo_countries': ['JP']}), 'BrightcoveNew')
elif video_type not in {'series', 'episodes'}:
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id(self._search_regex(
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),

View File

@@ -8,7 +8,7 @@ from ..utils import (
class TVN24IE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:(?!eurosport)[^/]+\.)?tvn24(?:bis)?\.pl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
'md5': 'fbdec753d7bc29d96036808275f2130c',

View File

@@ -270,7 +270,7 @@ class TwitCastingLiveIE(InfoExtractor):
class TwitCastingUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
'info_dict': {

View File

@@ -1764,7 +1764,7 @@ class TwitterSpacesIE(TwitterBaseIE):
'release_timestamp': 1659904215,
'release_date': '20220807',
},
'params': {'skip_download': 'm3u8'},
'skip': 'No longer available',
}, {
# post_live/TimedOut but downloadable
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
@@ -1780,6 +1780,8 @@ class TwitterSpacesIE(TwitterBaseIE):
'upload_date': '20230413',
'release_timestamp': 1681839000,
'release_date': '20230418',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
},
'params': {'skip_download': 'm3u8'},
}, {
@@ -1790,11 +1792,31 @@ class TwitterSpacesIE(TwitterBaseIE):
'ext': 'm4a',
'title': '',
'description': 'Twitter Space participated by nobody yet',
'uploader': '息根とめる🔪Twitchで復活',
'uploader': '息根とめる',
'uploader_id': 'tomeru_ikinone',
'live_status': 'was_live',
'timestamp': 1685617198,
'upload_date': '20230601',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
},
'params': {'skip_download': 'm3u8'},
}, {
# Video Space
'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
'info_dict': {
'id': '1DXGydznBYWKM',
'ext': 'mp4',
'title': 'America and Israels “special relationship”',
'description': 'Twitter Space participated by nobody yet',
'uploader': 'Candace Owens',
'uploader_id': 'RealCandaceO',
'live_status': 'was_live',
'timestamp': 1723931351,
'upload_date': '20240817',
'release_timestamp': 1723932000,
'release_date': '20240817',
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
},
'params': {'skip_download': 'm3u8'},
}]
@@ -1854,13 +1876,17 @@ class TwitterSpacesIE(TwitterBaseIE):
source = traverse_obj(
self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
headers=headers, fatal=False) if source else []
for fmt in formats:
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
if not is_live:
fmt['container'] = 'm4a_dash'
is_audio_space = source and 'audio-space' in source
formats = self._extract_m3u8_formats(
source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
# XXX: Some audio-only Spaces need ffmpeg as downloader
entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
live=is_live, headers=headers, fatal=False) if source else []
if is_audio_space:
for fmt in formats:
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
if not is_live:
fmt['container'] = 'm4a_dash'
participants = ', '.join(traverse_obj(
space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'

View File

@@ -49,6 +49,7 @@ class KnownDRMIE(UnsupportedInfoExtractor):
r'amazon\.(?:\w{2}\.)?\w+/gp/video',
r'music\.amazon\.(?:\w{2}\.)?\w+',
r'(?:watch|front)\.njpwworld\.com',
r'qub\.ca/vrai',
)
_TESTS = [{
@@ -149,6 +150,9 @@ class KnownDRMIE(UnsupportedInfoExtractor):
}, {
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
'only_matching': True,
}, {
'url': 'https://www.qub.ca/vrai/l-effet-bocuse-d-or/saison-1/l-effet-bocuse-d-or-saison-1-bande-annonce-1098225063',
'only_matching': True,
}]
def _real_extract(self, url):

148
yt_dlp/extractor/vidflex.py Normal file
View File

@@ -0,0 +1,148 @@
import base64
import json
from .common import InfoExtractor
from ..utils import (
int_or_none,
join_nonempty,
mimetype2ext,
url_or_none,
)
from ..utils.traversal import traverse_obj
class VidflexIE(InfoExtractor):
_DOMAINS_RE = [
r'[^.]+\.vidflex\.tv',
r'(?:www\.)?acactv\.ca',
r'(?:www\.)?albertalacrossetv\.com',
r'(?:www\.)?cjfltv\.com',
r'(?:www\.)?figureitoutbaseball\.com',
r'(?:www\.)?ocaalive\.com',
r'(?:www\.)?pegasussports\.tv',
r'(?:www\.)?praxisseries\.ca',
r'(?:www\.)?silenticetv\.com',
r'(?:www\.)?tuffhedemantv\.com',
r'(?:www\.)?watchfuntv\.com',
r'live\.ofsaa\.on\.ca',
r'tv\.procoro\.ca',
r'tv\.realcastmedia\.net',
r'tv\.fringetheatre\.ca',
r'video\.haisla\.ca',
r'video\.hockeycanada\.ca',
r'video\.huuayaht\.org',
r'video\.turningpointensemble\.ca',
r'videos\.livingworks\.net',
r'videos\.telusworldofscienceedmonton\.ca',
r'watch\.binghamtonbulldogs\.com',
r'watch\.rekindle\.tv',
r'watch\.wpca\.com',
]
_VALID_URL = rf'https?://(?:{"|".join(_DOMAINS_RE)})/[a-z]{{2}}(?:-[a-z]{{2}})?/c/[\w-]+\.(?P<id>\d+)'
_TESTS = [{
'url': 'https://video.hockeycanada.ca/en/c/nwt-micd-up-with-jamie-lee-rattray.107486',
'only_matching': True,
}, {
# m3u8 + https
'url': 'https://video.hockeycanada.ca/en-us/c/nwt-micd-up-with-jamie-lee-rattray.107486',
'info_dict': {
'id': '107486',
'title': 'NWT: Micd up with Jamie Lee Rattray',
'ext': 'mp4',
'duration': 115,
'timestamp': 1634310409,
'upload_date': '20211015',
'tags': ['English', '2021', "National Women's Team"],
'description': 'md5:efb1cf6165b48cc3f5555c4262dd5b23',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
'url': 'https://video.hockeycanada.ca/en/c/mwc-remembering-the-wild-ride-in-riga.112307',
'info_dict': {
'id': '112307',
'title': 'MWC: Remembering the wild ride in Riga',
'ext': 'mp4',
'duration': 322,
'timestamp': 1716235607,
'upload_date': '20240520',
'tags': ['English', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
'description': r're:.+Canadas National Mens Team.+',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
# the same video in French
'url': 'https://video.hockeycanada.ca/fr/c/cmm-retour-sur-un-parcours-endiable-a-riga.112304',
'info_dict': {
'id': '112304',
'title': 'CMM : Retour sur un parcours endiablé à Riga',
'ext': 'mp4',
'duration': 322,
'timestamp': 1716235545,
'upload_date': '20240520',
'tags': ['French', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
'description': 'md5:cf825222882a3dab1cd62cffcf3b4d1f',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
'url': 'https://myfbcgreenville.vidflex.tv/en/c/may-12th-2024.658',
'only_matching': True,
}, {
'url': 'https://www.figureitoutbaseball.com/en/c/fiob-podcast-14-dan-bertolini-ncaa-d1-head-coach-recorded-11-29-2018.1367',
'only_matching': True,
}, {
'url': 'https://videos.telusworldofscienceedmonton.ca/en/c/the-aurora-project-timelapse-4.577',
'only_matching': True,
}, {
'url': 'https://www.tuffhedemantv.com/en/c/2022-tuff-hedeman-tour-hobbs-nm-january-22.227',
'only_matching': True,
}, {
'url': 'https://www.albertalacrossetv.com/en/c/up-floor-ground-balls-one-more.3449',
'only_matching': True,
}, {
'url': 'https://www.silenticetv.com/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
'only_matching': True,
}, {
'url': 'https://jphl.vidflex.tv/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_url = self._html_search_regex(
r'content_api:\s*(["\'])(?P<url>https?://(?:(?!\1).)+)\1', webpage, 'content api url', group='url')
media_config = traverse_obj(
self._download_json(data_url, video_id),
('config', {base64.b64decode}, {bytes.decode}, {json.loads}, {dict}))
return {
'id': video_id,
'formats': list(self._yield_formats(media_config, video_id)),
**self._search_json_ld(
webpage.replace('/*<![CDATA[*/', '').replace('/*]]>*/', ''), video_id),
}
def _yield_formats(self, media_config, video_id):
for media_source in traverse_obj(media_config, ('media', 'source', lambda _, v: url_or_none(v['src']))):
media_url = media_source['src']
media_type = mimetype2ext(media_source.get('type'))
if media_type == 'm3u8':
yield from self._extract_m3u8_formats(media_url, video_id, fatal=False, m3u8_id='hls')
elif media_type == 'mp4':
bitrate = self._search_regex(r'_(\d+)k\.mp4', media_url, 'bitrate', default=None)
yield {
'format_id': join_nonempty('http', bitrate),
'url': media_url,
'ext': 'mp4',
'tbr': int_or_none(bitrate),
}
else:
yield {
'url': media_url,
'ext': media_type,
}

View File

@@ -21,6 +21,7 @@ from ..utils import (
parse_filesize,
parse_iso8601,
parse_qs,
qualities,
smuggle_url,
str_or_none,
traverse_obj,
@@ -146,6 +147,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
})
# TODO: fix handling of 308 status code returned for live archive manifest requests
QUALITIES = ('low', 'medium', 'high')
quality = qualities(QUALITIES)
sep_pattern = r'/sep/video/'
for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
@@ -166,6 +169,11 @@ class VimeoBaseInfoExtractor(InfoExtractor):
m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
note=f'Downloading {cdn_name} m3u8 information',
fatal=False)
# m3u8 doesn't give audio bitrates; need to prioritize based on GROUP-ID
# See: https://github.com/yt-dlp/yt-dlp/issues/10854
for f in fmts:
if mobj := re.search(rf'audio-({"|".join(QUALITIES)})', f['format_id']):
f['quality'] = quality(mobj.group(1))
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif files_type == 'dash':
@@ -212,16 +220,6 @@ class VimeoBaseInfoExtractor(InfoExtractor):
owner = video_data.get('owner') or {}
video_uploader_url = owner.get('url')
duration = int_or_none(video_data.get('duration'))
chapter_data = try_get(config, lambda x: x['embed']['chapters']) or []
chapters = [{
'title': current_chapter.get('title'),
'start_time': current_chapter.get('timecode'),
'end_time': next_chapter.get('timecode'),
} for current_chapter, next_chapter in zip(chapter_data, chapter_data[1:] + [{'timecode': duration}])]
if chapters and chapters[0]['start_time']: # Chapters may not start from 0
chapters[:0] = [{'title': '<Untitled>', 'start_time': 0, 'end_time': chapters[0]['start_time']}]
return {
'id': str_or_none(video_data.get('id')) or video_id,
'title': video_title,
@@ -229,8 +227,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
'uploader_url': video_uploader_url,
'thumbnails': thumbnails,
'duration': duration,
'chapters': chapters or None,
'duration': int_or_none(video_data.get('duration')),
'chapters': sorted(traverse_obj(config, (
'embed', 'chapters', lambda _, v: int(v['timecode']) is not None, {
'title': ('title', {str}),
'start_time': ('timecode', {int_or_none}),
})), key=lambda c: c['start_time']) or None,
'formats': formats,
'subtitles': subtitles,
'live_status': live_status,
@@ -240,13 +242,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
}
def _extract_original_format(self, url, video_id, unlisted_hash=None):
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
return self._download_json(
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json',
}, query={
'fields': ','.join((
'config_url', 'created_time', 'description', 'download', 'license',
'metadata.connections.comments.total', 'metadata.connections.likes.total',
'release_time', 'stats.plays')),
}, **kwargs)
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
# Original/source formats are only available when logged in
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
return
query = {'action': 'load_download_config'}
if unlisted_hash:
query['unlisted_hash'] = unlisted_hash
download_data = self._download_json(
url, video_id, fatal=False, query=query,
headers={'X-Requested-With': 'XMLHttpRequest'},
url, video_id, 'Loading download config JSON', fatal=False,
query=query, headers={'X-Requested-With': 'XMLHttpRequest'},
expected_status=(403, 404)) or {}
source_file = download_data.get('source_file')
download_url = try_get(source_file, lambda x: x['download_url'])
@@ -267,15 +286,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'quality': 1,
}
jwt_response = self._download_json(
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
if not jwt_response.get('jwt'):
jwt = jwt or traverse_obj(self._download_json(
'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
if not jwt:
return
headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'}
original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False, expected_status=(403, 404)) or {}
for download_data in original_response.get('download') or []:
original_response = api_data or self._call_videos_api(
video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
download_url = download_data.get('link')
if not download_url or download_data.get('quality') != 'source':
continue
@@ -360,7 +377,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'skip': 'No longer available',
},
{
'url': 'http://player.vimeo.com/video/54469442',
'url': 'https://player.vimeo.com/video/54469442',
'md5': '619b811a4417aa4abe78dc653becf511',
'note': 'Videos that embed the url in the player page',
'info_dict': {
@@ -376,6 +393,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': {
'format': 'best[protocol=https]',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'http://vimeo.com/68375962',
@@ -385,22 +403,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '68375962',
'ext': 'mp4',
'title': 'youtube-dl password protected test video',
'timestamp': 1371200155,
'timestamp': 1371214555,
'upload_date': '20130614',
'release_timestamp': 1371214555,
'release_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
'view_count': int,
'comment_count': int,
'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
},
'params': {
'format': 'best[protocol=https]',
'videopassword': 'youtube-dl',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'http://vimeo.com/channels/keypeele/75629013',
@@ -424,29 +443,38 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': int,
},
'params': {'format': 'http-1080p'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'http://vimeo.com/76979871',
'note': 'Video with subtitles',
'info_dict': {
'id': '76979871',
'ext': 'mov',
'ext': 'mp4',
'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
'timestamp': 1381846109,
'description': str, # FIXME: Dynamic SEO spam description
'timestamp': 1381860509,
'upload_date': '20131015',
'release_timestamp': 1381860509,
'release_date': '20131015',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
'uploader_id': 'staff',
'uploader': 'Vimeo Staff',
'uploader': 'Vimeo',
'duration': 62,
'comment_count': int,
'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/452001751-8216e0571c251a09d7a8387550942d89f7f86f6398f8ed886e639b0dd50d3c90-d_1280',
'subtitles': {
'de': [{'ext': 'vtt'}],
'en': [{'ext': 'vtt'}],
'es': [{'ext': 'vtt'}],
'fr': [{'ext': 'vtt'}],
'de': 'count:3',
'en': 'count:3',
'es': 'count:3',
'fr': 'count:3',
},
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'expected_warnings': [
'Ignoring subtitle tracks found in the HLS manifest',
'Failed to parse XML: not well-formed',
],
},
{
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
@@ -462,11 +490,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 118,
'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# contains original format
# contains Original format
'url': 'https://vimeo.com/33951933',
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
# 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
'info_dict': {
'id': '33951933',
'ext': 'mp4',
@@ -482,15 +511,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
'like_count': int,
'tags': 'count:11',
},
# 'params': {'format': 'Original'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'note': 'Contains original format not accessible in webpage',
'note': 'Contains source format not accessible in webpage',
'url': 'https://vimeo.com/393756517',
'md5': 'c464af248b592190a5ffbb5d33f382b0',
# 'md5': 'c464af248b592190a5ffbb5d33f382b0',
'info_dict': {
'id': '393756517',
'ext': 'mov',
# 'ext': 'mov',
'ext': 'mp4',
'timestamp': 1582642091,
'uploader_id': 'frameworkla',
'title': 'Straight To Hell - Sabrina: Netflix',
@@ -501,6 +534,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
'uploader_url': 'https://vimeo.com/frameworkla',
},
# 'params': {'format': 'source'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# only available via https://vimeo.com/channels/tributes/6213729 and
@@ -517,16 +552,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
'channel_id': 'tributes',
'timestamp': 1250886430,
'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
'description': str, # FIXME: Dynamic SEO spam description
'duration': 321,
'comment_count': int,
'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
'like_count': int,
'tags': ['[the shining', 'vimeohq', 'cv', 'vimeo tribute]'],
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# redirects to ondemand extractor and should be passed through it
@@ -549,28 +586,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
'skip': 'this page is no longer available.',
},
{
'url': 'http://player.vimeo.com/video/68375962',
'url': 'https://player.vimeo.com/video/68375962',
'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
'info_dict': {
'id': '68375962',
'ext': 'mp4',
'title': 'youtube-dl password protected test video',
'timestamp': 1371200155,
'upload_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
'view_count': int,
'comment_count': int,
'like_count': int,
},
'params': {
'format': 'best[protocol=https]',
'videopassword': 'youtube-dl',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
@@ -598,7 +630,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
'uploader': 'Philipp Hagemeister',
'uploader_id': 'user20132939',
'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
'description': str, # FIXME: Dynamic SEO spam description
'upload_date': '20150209',
'timestamp': 1423518307,
'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
@@ -612,6 +644,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'format': 'best[protocol=https]',
'videopassword': 'youtube-dl',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# source file returns 403: Forbidden
@@ -639,11 +672,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'release_date': '20160329',
},
'params': {'skip_download': True},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'https://vimeo.com/138909882',
'info_dict': {
'id': '138909882',
# 'ext': 'm4v',
'ext': 'mp4',
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
@@ -651,11 +686,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Firework Champions',
'upload_date': '20150910',
'timestamp': 1441901895,
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d_1280',
'uploader_url': 'https://vimeo.com/fireworkchampions',
'tags': 'count:6',
'duration': 229,
'view_count': int,
'like_count': int,
'comment_count': int,
},
'params': {
'skip_download': True,
'format': 'Original',
# 'format': 'source',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'https://vimeo.com/channels/staffpicks/143603739',
@@ -676,8 +719,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': int,
'uploader_url': 'https://vimeo.com/karimhd',
'channel_url': 'https://vimeo.com/channels/staffpicks',
'tags': 'count:6',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# requires passing unlisted_hash(a52724358e) to load_download_config request
@@ -707,6 +752,82 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# chapters must be sorted, see: https://github.com/yt-dlp/yt-dlp/issues/5308
'url': 'https://player.vimeo.com/video/756714419',
'info_dict': {
'id': '756714419',
'ext': 'mp4',
'title': 'Dr Arielle Schwartz - Therapeutic yoga for optimum sleep',
'uploader': 'Alex Howard',
'uploader_id': 'user54729178',
'uploader_url': 'https://vimeo.com/user54729178',
'thumbnail': r're:https://i\.vimeocdn\.com/video/1520099929-[\da-f]+-d_1280',
'duration': 2636,
'chapters': [
{'start_time': 0, 'end_time': 10, 'title': '<Untitled Chapter 1>'},
{'start_time': 10, 'end_time': 106, 'title': 'Welcoming Dr Arielle Schwartz'},
{'start_time': 106, 'end_time': 305, 'title': 'What is therapeutic yoga?'},
{'start_time': 305, 'end_time': 594, 'title': 'Vagal toning practices'},
{'start_time': 594, 'end_time': 888, 'title': 'Trauma and difficulty letting go'},
{'start_time': 888, 'end_time': 1059, 'title': "Dr Schwartz' insomnia experience"},
{'start_time': 1059, 'end_time': 1471, 'title': 'A strategy for helping sleep issues'},
{'start_time': 1471, 'end_time': 1667, 'title': 'Yoga nidra'},
{'start_time': 1667, 'end_time': 2121, 'title': 'Wisdom in stillness'},
{'start_time': 2121, 'end_time': 2386, 'title': 'What helps us be more able to let go?'},
{'start_time': 2386, 'end_time': 2510, 'title': 'Practical tips to help ourselves'},
{'start_time': 2510, 'end_time': 2636, 'title': 'Where to find out more'},
],
},
'params': {
'http_headers': {'Referer': 'https://sleepsuperconference.com'},
'skip_download': 'm3u8',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# vimeo.com URL with unlisted hash and Original format
'url': 'https://vimeo.com/144579403/ec02229140',
# 'md5': '6b662c2884e0373183fbde2a0d15cb78',
'info_dict': {
'id': '144579403',
'ext': 'mp4',
'title': 'SALESMANSHIP',
'description': 'md5:4338302f347a1ff8841b4a3aecaa09f0',
'uploader': 'Off the Picture Pictures',
'uploader_id': 'offthepicturepictures',
'uploader_url': 'https://vimeo.com/offthepicturepictures',
'duration': 669,
'upload_date': '20151104',
'timestamp': 1446607180,
'release_date': '20151104',
'release_timestamp': 1446607180,
'like_count': int,
'view_count': int,
'comment_count': int,
'thumbnail': r're:https://i\.vimeocdn\.com/video/1018638656-[\da-f]+-d_1280',
},
# 'params': {'format': 'Original'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# player.vimeo.com URL with source format
'url': 'https://player.vimeo.com/video/859028877',
# 'md5': '19ca3d2463441dee2d2f0671ac2916a2',
'info_dict': {
'id': '859028877',
'ext': 'mp4',
'title': 'Ariana Grande - Honeymoon Avenue (Live from London)',
'uploader': 'Raja Virdi',
'uploader_id': 'rajavirdi',
'uploader_url': 'https://vimeo.com/rajavirdi',
'duration': 309,
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d_1280',
},
# 'params': {'format': 'source'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# user playlist alias -> https://vimeo.com/258705797
@@ -741,16 +862,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise ExtractorError('Wrong video password', expected=True)
return checked
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
return self._download_json(
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json',
}, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
})
def _extract_from_api(self, video_id, unlisted_hash=None):
viewer = self._download_json(
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
@@ -771,6 +882,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
info = self._parse_config(self._download_json(
video['config_url'], video_id), video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
if source_format:
info['formats'].append(source_format)
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
info.update({
'description': video.get('description'),
@@ -872,7 +988,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
if config.get('view') == 4:
config = self._verify_player_video_password(
redirect_url, video_id, headers)
return self._parse_config(config, video_id)
info = self._parse_config(config, video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{video_id}', video_id, unlisted_hash)
if source_format:
info['formats'].append(source_format)
return info
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config:
@@ -1240,8 +1361,22 @@ class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
class VimeoReviewIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:review'
IE_DESC = 'Review pages on vimeo'
_VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
_VALID_URL = r'https?://vimeo\.com/(?P<user>[^/?#]+)/review/(?P<id>\d+)/(?P<hash>[\da-f]{10})'
_TESTS = [{
'url': 'https://vimeo.com/user170863801/review/996447483/a316d6ed8d',
'info_dict': {
'id': '996447483',
'ext': 'mp4',
'title': 'Rodeo day 1-_2',
'uploader': 'BROADKAST',
'uploader_id': 'user170863801',
'uploader_url': 'https://vimeo.com/user170863801',
'duration': 30,
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML'],
}, {
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
'info_dict': {
@@ -1255,6 +1390,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280',
'uploader_url': 'https://vimeo.com/user21297594',
},
'skip': '404 Not Found',
}, {
'note': 'video player needs Referer',
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
@@ -1286,26 +1422,23 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
}]
def _real_extract(self, url):
page_url, video_id = self._match_valid_url(url).groups()
data = self._download_json(
page_url.replace('/review/', '/review/data/'), video_id)
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
data = self._download_json(data_url, video_id)
viewer = {}
if data.get('isLocked') is True:
video_password = self._get_video_password()
viewer = self._download_json(
'https://vimeo.com/_rv/viewer', video_id)
webpage = self._verify_video_password(video_id, video_password, viewer['xsrft'])
clip_page_config = self._parse_json(self._search_regex(
r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
webpage, 'clip page config'), video_id)
config_url = clip_page_config['player']['config_url']
clip_data = clip_page_config.get('clip') or {}
else:
clip_data = data['clipData']
config_url = clip_data['configUrl']
self._verify_video_password(video_id, video_password, viewer['xsrft'])
data = self._download_json(data_url, video_id)
clip_data = data['clipData']
config_url = clip_data['configUrl']
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
source_format = self._extract_original_format(
page_url + '/action', video_id)
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
if source_format:
info_dict['formats'].append(source_format)
info_dict['description'] = clean_html(clip_data.get('description'))

View File

@@ -90,7 +90,7 @@ class ViuIE(ViuBaseIE):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
for key, value in video_data.items():
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
mobj = re.match(r'subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
if not mobj:
continue
subtitles.setdefault(mobj.group('lang'), []).append({

View File

@@ -8,6 +8,7 @@ from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
filter_dict,
float_or_none,
int_or_none,
parse_qs,
@@ -25,16 +26,25 @@ class WistiaBaseIE(InfoExtractor):
def _download_embed_config(self, config_type, config_id, referer):
base_url = self._EMBED_BASE_URL + f'{config_type}/{config_id}'
video_password = self.get_param('videopassword')
embed_config = self._download_json(
base_url + '.json', config_id, headers={
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
})
}, query=filter_dict({'password': video_password}))
error = traverse_obj(embed_config, 'error')
if error:
raise ExtractorError(
f'Error while getting the playlist: {error}', expected=True)
if traverse_obj(embed_config, (
'media', ('embed_options', 'embedOptions'), 'plugin',
'passwordProtectedVideo', 'on', any)) == 'true':
if video_password:
raise ExtractorError('Invalid video password', expected=True)
raise ExtractorError(
'This content is password-protected. Use the --video-password option', expected=True)
return embed_config
def _get_real_ext(self, url):

View File

@@ -1,7 +1,17 @@
import base64
import math
import time
from .common import InfoExtractor
from ..utils import InAdvancePagedList, str_or_none, traverse_obj, try_call
from .videa import VideaIE
from ..utils import (
InAdvancePagedList,
int_or_none,
str_or_none,
traverse_obj,
try_call,
update_url_query,
)
class XimalayaBaseIE(InfoExtractor):
@@ -11,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor):
class XimalayaIE(XimalayaBaseIE):
IE_NAME = 'ximalaya'
IE_DESC = '喜马拉雅FM'
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
_TESTS = [
{
'url': 'http://www.ximalaya.com/sound/47740352/',
@@ -71,23 +81,92 @@ class XimalayaIE(XimalayaBaseIE):
'like_count': int,
},
},
{
# VIP-restricted audio
'url': 'https://www.ximalaya.com/sound/562111701',
'only_matching': True,
},
]
@staticmethod
def _decrypt_filename(file_id, seed):
cgstr = ''
key = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890'
for _ in key:
seed = float(int(211 * seed + 30031) % 65536)
r = int(seed / 65536 * len(key))
cgstr += key[r]
key = key.replace(key[r], '')
parts = file_id.split('*')
filename = ''.join(cgstr[int(part)] for part in parts if part.isdecimal())
if not filename.startswith('/'):
filename = '/' + filename
return filename
@staticmethod
def _decrypt_url_params(encrypted_params):
params = VideaIE.rc4(
base64.b64decode(encrypted_params), 'xkt3a41psizxrh9l').split('-')
# sign, token, timestamp
return params[1], params[2], params[3]
def _real_extract(self, url):
scheme = 'https' if url.startswith('https') else 'http'
audio_id = self._match_id(url)
audio_info_file = f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json'
audio_info = self._download_json(
audio_info_file, audio_id,
f'Downloading info json {audio_info_file}', 'Unable to download info file')
f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json', audio_id,
'Downloading info json', 'Unable to download info file')
formats = [{
formats = []
# NOTE: VIP-restricted audio
if audio_info.get('is_paid'):
ts = int(time.time())
vip_info = self._download_json(
f'{scheme}://mpay.ximalaya.com/mobile/track/pay/{audio_id}/{ts}',
audio_id, 'Downloading VIP info json', 'Unable to download VIP info file',
query={'device': 'pc', 'isBackend': 'true', '_': ts})
filename = self._decrypt_filename(vip_info['fileId'], vip_info['seed'])
sign, token, timestamp = self._decrypt_url_params(vip_info['ep'])
vip_url = update_url_query(
f'{vip_info["domain"]}/download/{vip_info["apiVersion"]}{filename}', {
'sign': sign,
'token': token,
'timestamp': timestamp,
'buy_key': vip_info['buyKey'],
'duration': vip_info['duration'],
})
fmt = {
'format_id': 'vip',
'url': vip_url,
'vcodec': 'none',
}
if '_preview_' in vip_url:
self.report_warning(
f'This tracks requires a VIP account. Using a sample instead. {self._login_hint()}')
fmt.update({
'format_note': 'Sample',
'preference': -10,
**traverse_obj(vip_info, {
'filesize': ('sampleLength', {int_or_none}),
'duration': ('sampleDuration', {int_or_none}),
}),
})
else:
fmt.update(traverse_obj(vip_info, {
'filesize': ('totalLength', {int_or_none}),
'duration': ('duration', {int_or_none}),
}))
fmt['abr'] = try_call(lambda: fmt['filesize'] * 8 / fmt['duration'] / 1024)
formats.append(fmt)
formats.extend([{
'format_id': f'{bps}k',
'url': audio_info[k],
'abr': bps,
'vcodec': 'none',
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)]
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)])
thumbnails = []
for k in audio_info:

View File

@@ -3,16 +3,13 @@ from ..utils import (
int_or_none,
str_or_none,
try_get,
update_url_query,
url_or_none,
)
class XinpianchangIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://www\.xinpianchang\.com/(?P<id>[^/]+?)(?:\D|$)'
IE_NAME = 'xinpianchang'
IE_DESC = 'xinpianchang.com'
_VALID_URL = r'https?://(www\.)?xinpianchang\.com/(?P<id>a\d+)'
IE_DESC = '新片场'
_TESTS = [{
'url': 'https://www.xinpianchang.com/a11766551',
'info_dict': {
@@ -49,11 +46,11 @@ class XinpianchangIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id=video_id)
domain = self.find_value_with_regex(var='requireNewDomain', webpage=webpage)
vid = self.find_value_with_regex(var='vid', webpage=webpage)
app_key = self.find_value_with_regex(var='modeServerAppKey', webpage=webpage)
api = update_url_query(f'{domain}/mod/api/v2/media/{vid}', {'appKey': app_key})
data = self._download_json(api, video_id=video_id)['data']
video_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['detail']['video']
data = self._download_json(
f'https://mod-api.xinpianchang.com/mod/api/v2/media/{video_data["vid"]}', video_id,
query={'appKey': video_data['appKey']})['data']
formats, subtitles = [], {}
for k, v in data.get('resource').items():
if k in ('dash', 'hls'):
@@ -72,6 +69,10 @@ class XinpianchangIE(InfoExtractor):
'width': int_or_none(prog.get('width')),
'height': int_or_none(prog.get('height')),
'ext': 'mp4',
'http_headers': {
# NB: Server returns 403 without the Range header
'Range': 'bytes=0-',
},
} for prog in v if prog.get('url') or []])
return {
@@ -87,6 +88,3 @@ class XinpianchangIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
}
def find_value_with_regex(self, var, webpage):
return self._search_regex(rf'var\s{var}\s=\s\"(?P<vid>[^\"]+)\"', webpage, name=var)

View File

@@ -10,7 +10,7 @@ from ..utils import (
class YleAreenaIE(InfoExtractor):
_VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)'
_VALID_URL = r'https?://areena\.yle\.fi/(?P<podcast>podcastit/)?(?P<id>[\d-]+)'
_GEO_COUNTRIES = ['FI']
_TESTS = [
{
@@ -77,7 +77,7 @@ class YleAreenaIE(InfoExtractor):
]
def _real_extract(self, url):
video_id = self._match_id(url)
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
video_data = self._download_json(
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
@@ -103,8 +103,11 @@ class YleAreenaIE(InfoExtractor):
'name': sub.get('kind'),
})
kaltura_id = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id'), expected_type=str)
if kaltura_id:
if is_podcast:
info_dict = {
'url': video_data['data']['ongoing_ondemand']['media_url'],
}
elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
info_dict = {
'_type': 'url_transparent',
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
@@ -114,13 +117,11 @@ class YleAreenaIE(InfoExtractor):
formats, subs = self._extract_m3u8_formats_and_subtitles(
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
self._merge_subtitles(subs, target=subtitles)
info_dict = {
'id': video_id,
'formats': formats,
}
info_dict = {'formats': formats}
return {
**info_dict,
'id': video_id,
'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
or episode or info.get('title')),
'description': description,

View File

@@ -136,7 +136,7 @@ class YoukuIE(InfoExtractor):
# request basic data
basic_data_params = {
'vid': video_id,
'ccode': '0524',
'ccode': '0564',
'client_ip': '192.168.1.1',
'utid': cna,
'client_ts': time.time() / 1000,

View File

@@ -69,136 +69,179 @@ from ..utils import (
)
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
STREAMING_DATA_PO_TOKEN = '__yt_dlp_po_token'
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
'web': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20220801.00.00',
'clientVersion': '2.20240726.00.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
'REQUIRE_PO_TOKEN': True,
},
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
'web_safari': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20240726.00.00',
'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
'REQUIRE_PO_TOKEN': True,
},
'web_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_EMBEDDED_PLAYER',
'clientVersion': '1.20220731.00.00',
'clientVersion': '1.20240723.01.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
},
'web_music': {
'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
'INNERTUBE_HOST': 'music.youtube.com',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_REMIX',
'clientVersion': '1.20220727.01.00',
'clientVersion': '1.20240724.00.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
},
'web_creator': {
'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_CREATOR',
'clientVersion': '1.20220726.00.00',
'clientVersion': '1.20240723.03.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
},
'android': {
'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID',
'clientVersion': '19.09.37',
'clientVersion': '19.29.37',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip',
'osName': 'Android',
'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'REQUIRE_JS_PLAYER': False,
},
'android_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER',
'clientVersion': '19.09.37',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True,
},
'android_music': {
'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_MUSIC',
'clientVersion': '6.42.52',
'clientVersion': '7.11.50',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip',
'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip',
'osName': 'Android',
'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True,
},
'android_creator': {
'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_CREATOR',
'clientVersion': '22.30.100',
'clientVersion': '24.30.100',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip',
'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip',
'osName': 'Android',
'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True,
},
# YouTube Kids videos aren't returned on this client for some reason
'android_vr': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_VR',
'clientVersion': '1.57.29',
'deviceMake': 'Oculus',
'deviceModel': 'Quest 3',
'androidSdkVersion': 32,
'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
'osName': 'Android',
'osVersion': '12L',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
'REQUIRE_JS_PLAYER': False,
},
'android_testsuite': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_TESTSUITE',
'clientVersion': '1.9',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip',
'osName': 'Android',
'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 30,
'REQUIRE_JS_PLAYER': False,
'PLAYER_PARAMS': '2AMB',
},
# This client only has legacy formats and storyboards
'android_producer': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_PRODUCER',
'clientVersion': '0.111.1',
'androidSdkVersion': 30,
'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip',
'osName': 'Android',
'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 91,
'REQUIRE_JS_PLAYER': False,
},
# iOS clients have HLS live streams. Setting device model to get 60fps formats.
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
'ios': {
'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
'clientVersion': '19.09.3',
'deviceModel': 'iPhone14,3',
'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
'clientVersion': '19.29.1',
'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
'osName': 'iPhone',
'osVersion': '17.5.1.21F90',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
'REQUIRE_JS_PLAYER': False,
},
'ios_embedded': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MESSAGES_EXTENSION',
'clientVersion': '19.09.3',
'deviceModel': 'iPhone14,3',
'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
'REQUIRE_JS_PLAYER': False,
},
'ios_music': {
'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MUSIC',
'clientVersion': '6.33.3',
'deviceModel': 'iPhone14,3',
'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
'clientVersion': '7.08.2',
'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
'osName': 'iPhone',
'osVersion': '17.5.1.21F90',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
@@ -208,9 +251,12 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_CREATOR',
'clientVersion': '22.33.101',
'deviceModel': 'iPhone14,3',
'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
'clientVersion': '24.30.100',
'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
'osName': 'iPhone',
'osVersion': '17.5.1.21F90',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
@@ -219,19 +265,26 @@ INNERTUBE_CLIENTS = {
# mweb has 'ultralow' formats
# See: https://github.com/yt-dlp/yt-dlp/pull/557
'mweb': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
'clientVersion': '2.20220801.00.00',
'clientVersion': '2.20240726.01.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
},
'tv': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'TVHTML5',
'clientVersion': '7.20240724.13.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
},
# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
# See: https://github.com/zerodytrash/YouTube-Internal-Clients
'tv_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
@@ -249,6 +302,7 @@ INNERTUBE_CLIENTS = {
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
'REQUIRE_JS_PLAYER': False,
},
}
@@ -262,7 +316,7 @@ def _split_innertube_client(client_name):
def short_client_name(client_name):
main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
main, *parts = _split_innertube_client(client_name)[0].split('_')
return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
@@ -274,23 +328,19 @@ def build_innertube_clients():
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg.setdefault('REQUIRE_PO_TOKEN', False)
ytcfg.setdefault('PLAYER_PARAMS', None)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
_, base_client, variant = _split_innertube_client(client)
ytcfg['priority'] = 10 * priority(base_client)
if not variant:
INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
embedscreen['priority'] -= 3
elif variant == 'embedded':
if variant == 'embedded':
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
ytcfg['priority'] -= 2
else:
elif variant:
ytcfg['priority'] -= 3
@@ -566,9 +616,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
or req_api_hostname or self._get_innertube_host(default_client or 'web'))
def _extract_api_key(self, ytcfg=None, default_client='web'):
return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
def _extract_context(self, ytcfg=None, default_client='web'):
context = get_first(
(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
@@ -614,13 +661,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
real_headers.update({'content-type': 'application/json'})
if headers:
real_headers.update(headers)
api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
or api_key or self._extract_api_key(default_client=default_client))
return self._download_json(
f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers,
query={'key': api_key, 'prettyPrint': 'false'})
query=filter_dict({
'key': self._configuration_arg(
'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0],
'prettyPrint': 'false',
}, cndn=lambda _, v: v))
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
@@ -647,31 +696,46 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
'identity token', default=None, fatal=False)
@staticmethod
def _extract_account_syncid(*args):
def _data_sync_id_to_delegated_session_id(self, data_sync_id):
if not data_sync_id:
return
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
channel_syncid, _, user_syncid = data_sync_id.partition('||')
if user_syncid:
return channel_syncid
def _extract_account_syncid(self, *args):
"""
Extract syncId required to download private playlists of secondary channels
Extract current session ID required to download private playlists of secondary channels
@params response and/or ytcfg
"""
for data in args:
# ytcfg includes channel_syncid if on secondary channel
delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
if delegated_sid:
return delegated_sid
sync_ids = (try_get(
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
lambda x: x['DATASYNC_ID']), str) or '').split('||')
if len(sync_ids) >= 2 and sync_ids[1]:
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
return sync_ids[0]
# ytcfg includes channel_syncid if on secondary channel
if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)):
return delegated_sid
@staticmethod
def _extract_visitor_data(*args):
data_sync_id = self._extract_data_sync_id(*args)
return self._data_sync_id_to_delegated_session_id(data_sync_id)
def _extract_data_sync_id(self, *args):
"""
Extract current account dataSyncId.
In the format DELEGATED_SESSION_ID||USER_SESSION_ID or USER_SESSION_ID||
@params response and/or ytcfg
"""
if data_sync_id := self._configuration_arg('data_sync_id', [None], ie_key=YoutubeIE, casesense=True)[0]:
return data_sync_id
return traverse_obj(
args, (..., ('DATASYNC_ID', ('responseContext', 'mainAppWebResponseContext', 'datasyncId')), {str}, any))
def _extract_visitor_data(self, *args):
"""
Extracts visitorData from an API response or ytcfg
Appears to be used to track session state
"""
if visitor_data := self._configuration_arg('visitor_data', [None], ie_key=YoutubeIE, casesense=True)[0]:
return visitor_data
return get_first(
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
expected_type=str)
@@ -972,7 +1036,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
ep=ep, fatal=True, headers=headers,
video_id=item_id, query=query, note=note,
context=self._extract_context(ytcfg, default_client),
api_key=self._extract_api_key(ytcfg, default_client),
api_hostname=api_hostname, default_client=default_client)
except ExtractorError as e:
if not isinstance(e.cause, network_exceptions):
@@ -1294,7 +1357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
}
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_POTOKEN_EXPERIMENTS = ('51217476', '51217102')
_DEFAULT_CLIENTS = ('ios', 'web_creator')
_GEO_BYPASS = False
@@ -3129,12 +3192,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.write_debug(f'Decrypted nsig {s} => {ret}')
return ret
def _extract_n_function_name(self, jscode):
def _extract_n_function_name(self, jscode, player_url=None):
# Examples (with placeholders nfunc, narray, idx):
# * .get("n"))&&(b=nfunc(b)
# * .get("n"))&&(b=narray[idx](b)
# * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
# * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
# * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
funcname, idx = self._search_regex(
r'''(?x)(?:\.get\("n"\)\)&&\(b=|b=String\.fromCharCode\(110\),c=a\.get\(b\)\)&&\(c=)
(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''',
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
r'''(?x)
(?:
\.get\("n"\)\)&&\(b=|
(?:
b=String\.fromCharCode\(110\)|
(?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
)
(?:
,[a-zA-Z0-9_$]+\(a\))?,c=a\.
(?:
get\(b\)|
[a-zA-Z0-9_$]+\[b\]\|\|null
)\)&&\(c=|
\b(?P<var>[a-zA-Z0-9_$]+)=
)(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
(?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''',
jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
if not funcname:
self.report_warning(join_nonempty(
'Falling back to generic n function search',
player_url and f' player = {player_url}', delim='\n'))
return self._search_regex(
r'''(?xs)
;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
\s*\{(?:(?!};).)+?["']enhanced_except_''',
jscode, 'Initial JS player n function name', group='name')
elif not idx:
return funcname
return json.loads(js_to_json(self._search_regex(
@@ -3150,7 +3243,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if func_code:
return jsi, player_id, func_code
func_name = self._extract_n_function_name(jscode)
func_name = self._extract_n_function_name(jscode, player_url=player_url)
func_code = jsi.extract_function_code(func_name)
@@ -3626,6 +3719,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
**cls._get_checkok_params(),
}
def _get_config_po_token(self, client):
po_token_strs = self._configuration_arg('po_token', [], ie_key=YoutubeIE, casesense=True)
for token_str in po_token_strs:
po_token_client, sep, po_token = token_str.partition('+')
if not sep:
self.report_warning(
f'Invalid po_token configuration format. Expected "client+po_token", got "{token_str}"', only_once=True)
continue
if po_token_client == client:
return po_token
def fetch_po_token(self, client='web', visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
# PO Token is bound to visitor_data / Visitor ID when logged out. Must have visitor_data for it to function.
if not visitor_data and not self.is_authenticated and player_url:
self.report_warning(
f'Unable to fetch PO Token for {client} client: Missing required Visitor Data. '
f'You may need to pass Visitor Data with --extractor-args "youtube:visitor_data=XXX"')
return
config_po_token = self._get_config_po_token(client)
if config_po_token:
# PO token is bound to data_sync_id / account Session ID when logged in. However, for the config po_token,
# if using first channel in an account then we don't need the data_sync_id anymore...
if not data_sync_id and self.is_authenticated and player_url:
self.report_warning(
f'Got a PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
return config_po_token
# Require PO Token if logged in for external fetching
if not data_sync_id and self.is_authenticated and player_url:
self.report_warning(
f'Unable to fetch PO Token for {client} client: Missing required Data Sync ID for account. '
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
return
return self._fetch_po_token(
client=client,
visitor_data=visitor_data,
data_sync_id=data_sync_id,
player_url=player_url,
**kwargs,
)
def _fetch_po_token(self, client, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
"""External PO Token fetch stub"""
@staticmethod
def _is_agegated(player_response):
if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
@@ -3642,22 +3783,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
headers = self.generate_api_headers(
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
ytcfg=player_ytcfg,
default_client=client,
visitor_data=visitor_data,
session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
account_syncid=(
self._data_sync_id_to_delegated_session_id(data_sync_id)
or self._extract_account_syncid(master_ytcfg, initial_pr, player_ytcfg)
),
)
yt_query = {
'videoId': video_id,
}
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
if pp_arg:
yt_query['params'] = pp_arg
default_pp = traverse_obj(
INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str}))
if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
yt_query['params'] = player_params
if po_token:
yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
@@ -3668,30 +3818,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
android_clients = []
default = ['ios', 'web']
excluded_clients = []
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
for client in self._configuration_arg('player_client'):
if client == 'default':
requested_clients.extend(default)
requested_clients.extend(self._DEFAULT_CLIENTS)
elif client == 'all':
requested_clients.extend(allowed_clients)
elif client.startswith('-'):
excluded_clients.append(client[1:])
elif client not in allowed_clients:
self.report_warning(f'Skipping unsupported client {client}')
elif client.startswith('android'):
android_clients.append(client)
self.report_warning(f'Skipping unsupported client "{client}"')
else:
requested_clients.append(client)
# Force deprioritization of broken Android clients for format de-duplication
requested_clients.extend(android_clients)
if not requested_clients:
requested_clients = default
requested_clients.extend(self._DEFAULT_CLIENTS)
for excluded_client in excluded_clients:
if excluded_client in requested_clients:
requested_clients.remove(excluded_client)
if not requested_clients:
raise ExtractorError('No player clients have been requested', expected=True)
if smuggled_data.get('is_music_url') or self.is_music_url(url):
requested_clients.extend(
f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
for requested_client in requested_clients:
_, base_client, variant = _split_innertube_client(requested_client)
music_client = f'{base_client}_music'
if variant != 'music' and music_client in INNERTUBE_CLIENTS:
requested_clients.append(music_client)
return orderedSet(requested_clients)
@@ -3702,19 +3857,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return pr_id
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = ignore_initial_response = None
initial_pr = None
if webpage:
if 'web' in clients:
experiments = traverse_obj(master_ytcfg, (
'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...))
if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
self.report_warning(
'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response')
ignore_initial_response = True
initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
prs = []
deprioritized_prs = []
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
@@ -3736,14 +3886,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return
tried_iframe_fallback = False
player_url = None
player_url = visitor_data = data_sync_id = None
skipped_clients = {}
while clients:
deprioritize_pr = False
client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = {}
if client == 'web':
player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg
elif 'configs' not in self._configuration_arg('player_skip'):
player_ytcfg = master_ytcfg if client == 'web' else {}
if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
@@ -3756,43 +3905,77 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_url = self._download_player_url(video_id)
tried_iframe_fallback = True
pr = initial_pr if client == 'web' and not ignore_initial_response else None
for retry in self.RetryManager(fatal=False):
try:
pr = pr or self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg,
player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e:
self.report_warning(e)
break
experiments = traverse_obj(pr, (
'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK',
'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...))
if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
pr = None
retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True)
if not pr:
visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
po_token = self.fetch_po_token(
client=client, visitor_data=visitor_data,
data_sync_id=data_sync_id if self.is_authenticated else None,
player_url=player_url if require_js_player else None,
)
require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN')
if not po_token and require_po_token:
self.report_warning(
f'No PO Token provided for {client} client, '
f'which is required for working {client} formats. '
f'You can manually pass a PO Token for this client with '
f'--extractor-args "youtube:po_token={client}+XXX"',
only_once=True)
deprioritize_pr = True
pr = initial_pr if client == 'web' else None
try:
pr = pr or self._extract_player_response(
client, video_id,
master_ytcfg=player_ytcfg or master_ytcfg,
player_ytcfg=player_ytcfg,
player_url=player_url,
initial_pr=initial_pr,
visitor_data=visitor_data,
data_sync_id=data_sync_id,
po_token=po_token)
except ExtractorError as e:
self.report_warning(e)
continue
if pr_id := self._invalid_player_response(pr, video_id):
skipped_clients[client] = pr_id
elif pr:
# Save client name for introspection later
name = short_client_name(client)
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
sd[STREAMING_DATA_CLIENT_NAME] = name
sd[STREAMING_DATA_CLIENT_NAME] = client
sd[STREAMING_DATA_PO_TOKEN] = po_token
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = name
prs.append(pr)
f[STREAMING_DATA_CLIENT_NAME] = client
f[STREAMING_DATA_PO_TOKEN] = po_token
if deprioritize_pr:
deprioritized_prs.append(pr)
else:
prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
append_client(f'{base_client}_creator')
elif self._is_agegated(pr):
if variant == 'tv_embedded':
append_client(f'{base_client}_embedded')
elif not variant:
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
if self._is_agegated(pr) and variant != 'tv_embedded':
append_client(f'tv_embedded.{base_client}')
# Unauthenticated users will only get tv_embedded client formats if age-gated
if self._is_agegated(pr) and not self.is_authenticated:
self.to_screen(
f'{video_id}: This video is age-restricted; some formats may be missing '
f'without authentication. {self._login_hint()}', only_once=True)
# EU countries require age-verification for accounts to access age-restricted videos
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
# If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded
embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr)
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
self.to_screen(
f'{video_id}: This video is age-restricted and YouTube is requiring '
'account age-verification; some formats may be missing', only_once=True)
# web_creator and mediaconnect can work around the age-verification requirement
# _producer, _testsuite, & _vr variants can also work around age-verification
append_client('web_creator', 'mediaconnect')
prs.extend(deprioritized_prs)
if skipped_clients:
self.report_warning(
@@ -3927,14 +4110,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.report_warning(
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
# Android client formats are broken due to integrity check enforcement
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
is_broken = client_name and client_name.startswith(short_client_name('android'))
client_name = fmt[STREAMING_DATA_CLIENT_NAME]
po_token = fmt.get(STREAMING_DATA_PO_TOKEN)
if po_token:
fmt_url = update_url_query(fmt_url, {'pot': po_token})
# Clients that require PO Token return videoplayback URLs that may return 403
is_broken = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN'))
if is_broken:
self.report_warning(
f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
'They will be deprioritized', only_once=True)
f'{video_id}: {client_name} client formats require a PO Token which was not provided. '
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt.get('fps')) or 0
@@ -3948,7 +4135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
is_damaged and 'DAMAGED', is_broken and 'BROKEN',
(self.get_param('verbose') or all_formats) and client_name,
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
@@ -4010,12 +4197,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
skip_manifests.add('dash')
def process_manifest_format(f, proto, client_name, itag):
def process_manifest_format(f, proto, client_name, itag, po_token):
key = (proto, f.get('language'))
if not all_formats and key in itags[itag]:
return False
itags[itag].add(key)
if f.get('source_preference') is None:
f['source_preference'] = -1
# Clients that require PO Token return videoplayback URLs that may return 403
# hls does not currently require PO Token
if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls':
self.report_warning(
f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. '
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
f['format_note'] = join_nonempty(f.get('format_note'), 'BROKEN', delim=' ')
f['source_preference'] -= 20
if itag and all_formats:
f['format_id'] = f'{itag}-{proto}'
elif any(p != proto for p, _ in itags[itag]):
@@ -4027,9 +4226,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
f['language_preference'] = PREFERRED_LANG_VALUE
if f.get('source_preference') is None:
f['source_preference'] = -1
if itag in ('616', '235'):
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
f['source_preference'] += 100
@@ -4038,7 +4234,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if f['quality'] == -1 and f.get('height'):
f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
if self.get_param('verbose') or all_formats:
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
f['format_note'] = join_nonempty(
f.get('format_note'), short_client_name(client_name), delim=', ')
if f.get('fps') and f['fps'] <= 1:
del f['fps']
@@ -4049,24 +4246,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
subtitles = {}
for sd in streaming_data:
client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
client_name = sd[STREAMING_DATA_CLIENT_NAME]
po_token = sd.get(STREAMING_DATA_PO_TOKEN)
hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
if hls_manifest_url:
if po_token:
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
subtitles = self._merge_subtitles(subs, subtitles)
for f in fmts:
if process_manifest_format(f, 'hls', client_name, self._search_regex(
r'/itag/(\d+)', f['url'], 'itag', default=None)):
r'/itag/(\d+)', f['url'], 'itag', default=None), po_token):
yield f
dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
if dash_manifest_url:
if po_token:
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
for f in formats:
if process_manifest_format(f, 'dash', client_name, f['format_id']):
if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
f['filesize'] = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
if needs_live_processing:
@@ -4888,7 +5089,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _rich_entries(self, rich_grid_renderer):
renderer = traverse_obj(
rich_grid_renderer,
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
video_id = renderer.get('videoId')
if video_id:
yield self._extract_video(renderer)
@@ -4900,6 +5101,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=self._get_text(renderer, 'title'))
return
# shortsLockupViewModel extraction
entity_id = renderer.get('entityId')
if entity_id:
video_id = traverse_obj(renderer, ('onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId', {str}))
if not video_id:
return
yield self.url_result(
f'https://www.youtube.com/shorts/{video_id}',
ie=YoutubeIE, video_id=video_id,
**traverse_obj(renderer, ('overlayMetadata', {
'title': ('primaryText', 'content', {str}),
'view_count': ('secondaryText', 'content', {parse_count}),
})),
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
return
def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId')
@@ -7439,6 +7655,8 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
'id': clip_id,
'section_start': int(clip_data['startTimeMs']) / 1000,
'section_end': int(clip_data['endTimeMs']) / 1000,
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang'),
}

View File

@@ -709,9 +709,9 @@ class JSInterpreter:
obj.reverse()
return obj
elif member == 'slice':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(len(argvals) == 1, 'takes exactly one argument')
return obj[argvals[0]:]
assertion(isinstance(obj, (list, str)), 'must be applied on a list or string')
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
return obj[slice(*argvals, None)]
elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments')

View File

@@ -31,9 +31,9 @@ if curl_cffi is None:
curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3]))
if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)):
if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 7, 2)):
curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)'
raise ImportError('Only curl_cffi versions 0.5.10, 0.7.X are supported')
raise ImportError('Only curl_cffi versions 0.5.10, 0.7.0 and 0.7.1 are supported')
import curl_cffi.requests
from curl_cffi.const import CurlECode, CurlOpt

View File

@@ -10,7 +10,7 @@ import typing
import urllib.parse
import urllib.request
from .exceptions import RequestError, UnsupportedRequest
from .exceptions import RequestError
from ..dependencies import certifi
from ..socks import ProxyType, sockssocket
from ..utils import format_field, traverse_obj
@@ -206,7 +206,7 @@ def wrap_request_errors(func):
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except UnsupportedRequest as e:
except RequestError as e:
if e.handler is None:
e.handler = self
raise

View File

@@ -33,8 +33,8 @@ if not websockets:
import websockets.version
websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
if websockets_version < (12, 0):
raise ImportError('Only websockets>=12.0 is supported')
if websockets_version < (13, 0):
raise ImportError('Only websockets>=13.0 is supported')
import websockets.sync.client
from websockets.uri import parse_uri
@@ -47,10 +47,7 @@ from websockets.uri import parse_uri
# 2: "AttributeError: 'ClientConnection' object has no attribute 'recv_events_exc'. Did you mean: 'recv_events'?"
import websockets.sync.connection # isort: split
with contextlib.suppress(Exception):
# > 12.0
websockets.sync.connection.Connection.recv_exc = None
# 12.0
websockets.sync.connection.Connection.recv_events_exc = None
class WebsocketsResponseAdapter(WebSocketResponse):
@@ -162,7 +159,7 @@ class WebsocketsRH(WebSocketRequestHandler):
additional_headers=headers,
open_timeout=timeout,
user_agent_header=None,
ssl_context=ssl_ctx if wsuri.secure else None,
ssl=ssl_ctx if wsuri.secure else None,
close_timeout=0, # not ideal, but prevents yt-dlp hanging
)
return WebsocketsResponseAdapter(conn, url=request.url)

View File

@@ -647,16 +647,16 @@ def create_parser():
'You can also simply specify a field to match if the field is present, '
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
'the filter matches if at least one of the conditions is met. E.g. --match-filter '
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
'the filter matches if at least one of the conditions is met. E.g. --match-filters '
'!is_live --match-filters "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
'matches only videos that are not live OR those that have a like count more than 100 '
'(or the like field is not available) and also has a description '
'that contains the phrase "cats & dogs" (caseless). '
'Use "--match-filter -" to interactively ask whether to download each video'))
'Use "--match-filters -" to interactively ask whether to download each video'))
selection.add_option(
'--no-match-filters',
dest='match_filter', action='store_const', const=None,
help='Do not use any --match-filter (default)')
help='Do not use any --match-filters (default)')
selection.add_option(
'--break-match-filters',
metavar='FILTER', dest='breaking_match_filter', action='append',
@@ -704,7 +704,7 @@ def create_parser():
selection.add_option(
'--break-per-input',
action='store_true', dest='break_per_url', default=False,
help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL')
help='Alters --max-downloads, --break-on-existing, --break-match-filters, and autonumber to reset per input URL')
selection.add_option(
'--no-break-per-input',
action='store_false', dest='break_per_url',
@@ -1725,15 +1725,17 @@ def create_parser():
'--convert-subs', '--convert-sub', '--convert-subtitles',
metavar='FORMAT', dest='convertsubtitles', default=None,
help=(
'Convert the subtitles to another format (currently supported: {}) '
'(Alias: --convert-subtitles)'.format(', '.join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)))))
'Convert the subtitles to another format '
f'(currently supported: {", ".join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))}). '
'Use "--convert-subs none" to disable conversion (default) (Alias: --convert-subtitles)'))
postproc.add_option(
'--convert-thumbnails',
metavar='FORMAT', dest='convertthumbnails', default=None,
help=(
'Convert the thumbnails to another format '
f'(currently supported: {", ".join(sorted(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS))}). '
'You can specify multiple rules using similar syntax as --remux-video'))
'You can specify multiple rules using similar syntax as "--remux-video". '
'Use "--convert-thumbnails none" to disable conversion (default)'))
postproc.add_option(
'--split-chapters', '--split-tracks',
dest='split_chapters', action='store_true', default=False,

View File

@@ -33,7 +33,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
FFmpegPostProcessor.__init__(self, downloader)
self._categories = tuple(categories or self.CATEGORIES.keys())
self._API_URL = api if re.match('^https?://', api) else 'https://' + api
self._API_URL = api if re.match('https?://', api) else 'https://' + api
def run(self, info):
extractor = info['extractor_key']

View File

@@ -135,20 +135,42 @@ def _get_binary_name():
def _get_system_deprecation():
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 8), (3, 8)
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 8), (3, 9)
if sys.version_info > MIN_RECOMMENDED:
return None
major, minor = sys.version_info[:2]
if sys.version_info < MIN_SUPPORTED:
msg = f'Python version {major}.{minor} is no longer supported'
else:
msg = (f'Support for Python version {major}.{minor} has been deprecated. '
'\nYou may stop receiving updates on this version at any time')
PYTHON_MSG = f'Please update to Python {".".join(map(str, MIN_RECOMMENDED))} or above'
major, minor = MIN_RECOMMENDED
return f'{msg}! Please update to Python {major}.{minor} or above'
if sys.version_info < MIN_SUPPORTED:
return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}'
EXE_MSG_TMPL = ('Support for {} has been deprecated. '
'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}')
STOP_MSG = 'You may stop receiving updates on this version at any time!'
variant = detect_variant()
# Temporary until Windows builds use 3.9, which will drop support for Win7 and 2008ServerR2
if variant in ('win_exe', 'win_x86_exe', 'py2exe'):
platform_name = platform.platform()
if any(platform_name.startswith(f'Windows-{name}') for name in ('7', '2008ServerR2')):
return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG)
elif variant == 'py2exe':
return EXE_MSG_TMPL.format(
'py2exe builds (yt-dlp_min.exe)', 'issues/10087',
'In a future update you will be migrated to the PyInstaller-bundled executable. '
'This will be done automatically; no action is required on your part')
return None
# Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9
elif variant in ('linux_aarch64_exe', 'linux_armv7l_exe'):
libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2])
if libc_ver < (2, 31):
return EXE_MSG_TMPL.format('system glibc version < 2.31', 'pull/8638', STOP_MSG)
return None
return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}'
def _sha256_file(path):

View File

@@ -1217,7 +1217,7 @@ def unified_timestamp(date_str, day_first=True):
return None
date_str = re.sub(r'\s+', ' ', re.sub(
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?|sun)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
@@ -1954,7 +1954,7 @@ def urljoin(base, path):
path = path.decode()
if not isinstance(path, str) or not path:
return None
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
if re.match(r'(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
base = base.decode()
@@ -2007,7 +2007,7 @@ def url_or_none(url):
if not url or not isinstance(url, str):
return None
url = url.strip()
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
@@ -2919,6 +2919,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
'audio/webm': 'webm',
'audio/x-matroska': 'mka',
'audio/x-mpegurl': 'm3u',
'aacp': 'aac',
'midi': 'mid',
'ogg': 'ogg',
'wav': 'wav',
@@ -3112,7 +3113,7 @@ def is_html(first_bytes):
while first_bytes.startswith(bom):
encoding, first_bytes = enc, first_bytes[len(bom):]
return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
return re.match(r'\s*<', first_bytes.decode(encoding, 'replace'))
def determine_protocol(info_dict):
@@ -5280,7 +5281,7 @@ class FormatSorter:
settings = {
'vcodec': {'type': 'ordered', 'regex': True,
'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
'order': ['av0?1', 'vp0?9.0?2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
'acodec': {'type': 'ordered', 'regex': True,
'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',

View File

@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
__version__ = '2024.07.16'
__version__ = '2024.09.27'
RELEASE_GIT_HEAD = '89a161e8c62569a662deda1c948664152efcb6b4'
RELEASE_GIT_HEAD = 'c6387abc1af9842bb0541288a5610abba9b1ab51'
VARIANT = None
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp'
_pkg_version = '2024.07.16'
_pkg_version = '2024.09.27'