mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-14 12:15:23 +00:00
Merged master
This commit is contained in:
@@ -76,6 +76,7 @@ from .aenetworks import (
|
||||
)
|
||||
from .aeonco import AeonCoIE
|
||||
from .afreecatv import (
|
||||
AfreecaTVCatchStoryIE,
|
||||
AfreecaTVIE,
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
@@ -216,9 +217,8 @@ from .bbc import (
|
||||
BBCCoUkIPlayerGroupIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
)
|
||||
from .bbcmaestro import (
|
||||
BBCMaestroComIE,
|
||||
)
|
||||
from .bbcmaestro import BBCMaestroComIE
|
||||
from .beacon import BeaconTvIE
|
||||
from .beatbump import (
|
||||
BeatBumpPlaylistIE,
|
||||
BeatBumpVideoIE,
|
||||
@@ -279,6 +279,7 @@ from .bleacherreport import (
|
||||
from .blerp import BlerpIE
|
||||
from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bluesky import BlueskyIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boosty import BoostyIE
|
||||
@@ -364,7 +365,10 @@ from .ccc import (
|
||||
)
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .cda import (
|
||||
CDAIE,
|
||||
CDAFolderIE,
|
||||
)
|
||||
from .cellebrite import CellebriteIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .cgtn import CGTNIE
|
||||
@@ -399,8 +403,6 @@ from .cmt import CMTIE
|
||||
from .cnbc import CNBCVideoIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNArticleIE,
|
||||
CNNBlogsIE,
|
||||
CNNIndonesiaIE,
|
||||
)
|
||||
from .comedycentral import (
|
||||
@@ -506,7 +508,6 @@ from .dhm import DHMIE
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .discogs import DiscogsReleasePlaylistIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dlf import (
|
||||
@@ -534,16 +535,12 @@ from .dplay import (
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
DiscoveryPlusItalyIE,
|
||||
DiscoveryPlusItalyShowIE,
|
||||
DIYNetworkIE,
|
||||
DPlayIE,
|
||||
FoodNetworkIE,
|
||||
GlobalCyclingNetworkPlusIE,
|
||||
GoDiscoveryIE,
|
||||
HGTVDeIE,
|
||||
HGTVUsaIE,
|
||||
InvestigationDiscoveryIE,
|
||||
MotorTrendIE,
|
||||
MotorTrendOnDemandIE,
|
||||
ScienceChannelIE,
|
||||
TravelChannelIE,
|
||||
)
|
||||
@@ -736,6 +733,7 @@ from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .germanupa import GermanupaIE
|
||||
from .getcourseru import (
|
||||
GetCourseRuIE,
|
||||
GetCourseRuPlayerIE,
|
||||
@@ -782,6 +780,7 @@ from .gopro import GoProIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gotostage import GoToStageIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .graspop import GraspopIE
|
||||
from .gronkh import (
|
||||
GronkhFeedIE,
|
||||
GronkhIE,
|
||||
@@ -828,7 +827,10 @@ from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .huya import HuyaLiveIE
|
||||
from .huya import (
|
||||
HuyaLiveIE,
|
||||
HuyaVideoIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .hypergryph import MonsterSirenHypergryphMusicIE
|
||||
from .hytale import HytaleIE
|
||||
@@ -945,11 +947,13 @@ from .khanacademy import (
|
||||
KhanAcademyUnitIE,
|
||||
)
|
||||
from .kick import (
|
||||
KickClipIE,
|
||||
KickIE,
|
||||
KickVODIE,
|
||||
)
|
||||
from .kicker import KickerIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kika import KikaIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
@@ -972,6 +976,10 @@ from .la7 import (
|
||||
LA7PodcastEpisodeIE,
|
||||
LA7PodcastIE,
|
||||
)
|
||||
from .laracasts import (
|
||||
LaracastsIE,
|
||||
LaracastsPlaylistIE,
|
||||
)
|
||||
from .lastfm import (
|
||||
LastFMIE,
|
||||
LastFMPlaylistIE,
|
||||
@@ -988,6 +996,7 @@ from .lcp import (
|
||||
LcpIE,
|
||||
LcpPlayIE,
|
||||
)
|
||||
from .learningonscreen import LearningOnScreenIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lecturio import (
|
||||
LecturioCourseIE,
|
||||
@@ -1036,10 +1045,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .livestreamfails import LivestreamfailsIE
|
||||
from .lnkgo import (
|
||||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .lnk import LnkIE
|
||||
from .loom import (
|
||||
LoomFolderIE,
|
||||
LoomIE,
|
||||
@@ -1116,12 +1122,15 @@ from .meipai import MeipaiIE
|
||||
from .melonvod import MelonVODIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgtv import MGTVIE
|
||||
from .microsoftembed import MicrosoftEmbedIE
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
MicrosoftVirtualAcademyIE,
|
||||
from .microsoftembed import (
|
||||
MicrosoftBuildIE,
|
||||
MicrosoftEmbedIE,
|
||||
MicrosoftLearnEpisodeIE,
|
||||
MicrosoftLearnPlaylistIE,
|
||||
MicrosoftLearnSessionIE,
|
||||
MicrosoftMediusIE,
|
||||
)
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .mildom import (
|
||||
MildomClipIE,
|
||||
MildomIE,
|
||||
@@ -1161,6 +1170,7 @@ from .mlb import (
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mocha import MochaVideoIE
|
||||
from .mojevideo import MojevideoIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .motherless import (
|
||||
@@ -1606,6 +1616,7 @@ from .qqmusic import (
|
||||
QQMusicPlaylistIE,
|
||||
QQMusicSingerIE,
|
||||
QQMusicToplistIE,
|
||||
QQMusicVideoIE,
|
||||
)
|
||||
from .r7 import (
|
||||
R7IE,
|
||||
@@ -1758,7 +1769,10 @@ from .rtve import (
|
||||
RTVETelevisionIE,
|
||||
)
|
||||
from .rtvs import RTVSIE
|
||||
from .rtvslo import RTVSLOIE
|
||||
from .rtvslo import (
|
||||
RTVSLOIE,
|
||||
RTVSLOShowIE,
|
||||
)
|
||||
from .rudovideo import RudoVideoIE
|
||||
from .rule34video import Rule34VideoIE
|
||||
from .rumble import (
|
||||
@@ -1803,6 +1817,7 @@ from .screen9 import Screen9IE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastify import ScreencastifyIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenrec import ScreenRecIE
|
||||
from .scrippsnetworks import (
|
||||
ScrippsNetworksIE,
|
||||
ScrippsNetworksWatchIE,
|
||||
@@ -1813,6 +1828,7 @@ from .scte import (
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .sejmpl import SejmIE
|
||||
from .sen import SenIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import (
|
||||
SenateGovIE,
|
||||
@@ -1868,6 +1884,7 @@ from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snapchat import SnapchatSpotlightIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
@@ -1928,6 +1945,10 @@ from .spreaker import (
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .sproutvideo import (
|
||||
SproutVideoIE,
|
||||
VidsIoIE,
|
||||
)
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
@@ -2160,10 +2181,7 @@ from .tv5unis import (
|
||||
TV5UnisVideoIE,
|
||||
)
|
||||
from .tv24ua import TV24UAVideoIE
|
||||
from .tva import (
|
||||
TVAIE,
|
||||
QubIE,
|
||||
)
|
||||
from .tva import TVAIE
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesArticleIE,
|
||||
TVANouvellesIE,
|
||||
@@ -2303,6 +2321,7 @@ from .videomore import (
|
||||
VideomoreVideoIE,
|
||||
)
|
||||
from .videopress import VideoPressIE
|
||||
from .vidflex import VidflexIE
|
||||
from .vidio import (
|
||||
VidioIE,
|
||||
VidioLiveIE,
|
||||
@@ -2310,6 +2329,7 @@ from .vidio import (
|
||||
)
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidly import VidlyIE
|
||||
from .vidyard import VidyardIE
|
||||
from .viewlift import (
|
||||
ViewLiftEmbedIE,
|
||||
ViewLiftIE,
|
||||
@@ -2375,6 +2395,10 @@ from .vrt import (
|
||||
VrtNUIE,
|
||||
)
|
||||
from .vtm import VTMIE
|
||||
from .vtv import (
|
||||
VTVIE,
|
||||
VTVGoIE,
|
||||
)
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
|
||||
@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'skip': 'This program is not currently available in ABC iview',
|
||||
}, {
|
||||
'url': 'https://iview.abc.net.au/show/inbestigators',
|
||||
'info_dict': {
|
||||
'id': '175343-1',
|
||||
'title': 'Series 1',
|
||||
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
|
||||
'series': 'The Inbestigators',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage_data = self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
video_data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
|
||||
transform_source=lambda x: x.encode().decode('unicode_escape'),
|
||||
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
|
||||
@@ -9,12 +9,12 @@ import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..networking import RequestHandler, Response
|
||||
from ..networking.exceptions import TransportError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -26,37 +26,36 @@ from ..utils import (
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
headers = ydl.params['http_headers'].copy()
|
||||
proxies = ydl.proxies.copy()
|
||||
clean_proxies(proxies, headers)
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
class AbemaLicenseRH(RequestHandler):
|
||||
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
|
||||
_SUPPORTED_PROXY_SCHEMES = None
|
||||
_SUPPORTED_FEATURES = None
|
||||
RH_NAME = 'abematv_license'
|
||||
|
||||
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
handler_order = 499
|
||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, ie: 'AbemaTVIE'):
|
||||
# the protocol that this should really handle is 'abematv-license://'
|
||||
# abematv_license_open is just a placeholder for development purposes
|
||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
|
||||
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.ie = ie
|
||||
|
||||
def _send(self, request):
|
||||
url = request.url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
|
||||
try:
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
except ExtractorError as e:
|
||||
raise TransportError(cause=e.cause) from e
|
||||
except (IndexError, KeyError, TypeError) as e:
|
||||
raise TransportError(cause=repr(e)) from e
|
||||
|
||||
return Response(
|
||||
io.BytesIO(response_data), url,
|
||||
headers={'Content-Length': str(len(response_data))})
|
||||
|
||||
def _get_videokey_from_ticket(self, ticket):
|
||||
to_show = self.ie.get_param('verbose', False)
|
||||
media_token = self.ie._get_media_token(to_show=to_show)
|
||||
@@ -72,25 +71,17 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self.STRTABLE)
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self.HKEY),
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
@@ -139,7 +130,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
@@ -368,6 +359,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
|
||||
is_live, m3u8_url = False, None
|
||||
availability = 'public'
|
||||
if video_type == 'now-on-air':
|
||||
is_live = True
|
||||
channel_url = 'https://api.abema.io/v1/channels'
|
||||
@@ -385,10 +377,10 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
|
||||
if 3 not in ondemand_types:
|
||||
if not traverse_obj(api_response, ('label', 'free', {bool})):
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'name'),
|
||||
@@ -408,6 +400,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||
else:
|
||||
@@ -425,6 +418,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
@@ -48,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '9841',
|
||||
@@ -70,10 +71,7 @@ class ADNIE(ADNBaseIE):
|
||||
},
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
@@ -166,7 +164,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'username': username,
|
||||
})) or {}).get('accessToken')
|
||||
if access_token:
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
self._HEADERS['Authorization'] = f'Bearer {access_token}'
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
@@ -177,6 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
@@ -217,7 +216,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
@@ -256,6 +254,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
f'Downloading {format_id} {quality} JSON metadata',
|
||||
headers=self._HEADERS,
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
@@ -276,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + f'video/{video_id}', video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
@@ -298,9 +297,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
|
||||
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
@@ -311,24 +310,22 @@ class ADNSeasonIE(ADNBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'Downloading episode list', headers=self._HEADERS, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(
|
||||
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
|
||||
ADNIE, episode_id)
|
||||
yield self.url_result(join_nonempty(
|
||||
'https://animationdigitalnetwork.com', lang, 'video',
|
||||
video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
||||
@@ -1355,6 +1355,7 @@ MSO_INFO = {
|
||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
@@ -1454,7 +1455,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
}, headers={
|
||||
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
'User-Agent': self._MODERN_USER_AGENT,
|
||||
} if mso_id == 'Comcast_SSO' else None)
|
||||
elif not self._cookies_passed:
|
||||
raise_mvpd_required()
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -32,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'https://login.sooplive.co.kr/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
|
||||
-6: 'https://login.sooplive.co.kr/membership/changeMember.php',
|
||||
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
|
||||
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
|
||||
-12: 'https://member.sooplive.co.kr/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-7: 'You cannot use your Global Soop account to access Korean Soop.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
@@ -58,71 +59,42 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
f'Unable to login: {self.IE_NAME} said: {error}',
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
return self._download_json(Request(
|
||||
f'https://api.m.sooplive.co.kr/{endpoint}',
|
||||
data=data, headers=headers, query=query,
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
IE_NAME = 'soop'
|
||||
IE_DESC = 'sooplive.co.kr'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'url': 'https://vod.sooplive.co.kr/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
@@ -134,12 +106,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'url': 'https://vod.sooplive.co.kr/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
@@ -149,44 +121,25 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/70395877',
|
||||
'url': 'https://vod.sooplive.co.kr/player/70395877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subscribers only
|
||||
'url': 'https://vod.afreecatv.com/player/104647403',
|
||||
'url': 'https://vod.sooplive.co.kr/player/104647403',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private
|
||||
'url': 'https://vod.afreecatv.com/player/81669846',
|
||||
'url': 'https://vod.sooplive.co.kr/player/81669846',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/station/video/a/view', video_id,
|
||||
headers={'Referer': url}, data=urlencode_postdata({
|
||||
data = self._call_api(
|
||||
'station/video/a/view', video_id, headers={'Referer': url},
|
||||
data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
@@ -201,7 +154,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('bj_id', {str}),
|
||||
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
})
|
||||
|
||||
@@ -225,7 +178,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
|
||||
'formats': formats,
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('file_start', {unified_timestamp}),
|
||||
}),
|
||||
})
|
||||
@@ -253,12 +206,49 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:live'
|
||||
IE_DESC = 'afreecatv.com livestreams'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:catchstory'
|
||||
IE_DESC = 'sooplive.co.kr catch story'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'catchstory/a/view', video_id, headers={'Referer': url},
|
||||
query={'aStoryListIdx': '', 'nStoryIdx': video_id})
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
@staticmethod
|
||||
def _entries(data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:live'
|
||||
IE_DESC = 'sooplive.co.kr livestreams'
|
||||
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'info_dict': {
|
||||
'id': '237852185',
|
||||
'ext': 'mp4',
|
||||
@@ -270,30 +260,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
|
||||
_WORKING_CDNS = [
|
||||
'gcp_cdn', # live-global-cdn-v02.afreecatv.com
|
||||
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
|
||||
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
|
||||
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
|
||||
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
|
||||
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
|
||||
]
|
||||
_BAD_CDNS = [
|
||||
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
|
||||
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
|
||||
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
|
||||
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
|
||||
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
|
||||
]
|
||||
|
||||
def _extract_formats(self, channel_info, broadcast_no, aid):
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
|
||||
|
||||
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
|
||||
default_cdn_ids = orderedSet([
|
||||
@@ -313,7 +303,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
try:
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
|
||||
headers={'Referer': 'https://play.afreecatv.com/'})
|
||||
headers={'Referer': 'https://play.sooplive.co.kr/'})
|
||||
except ExtractorError as e:
|
||||
if attempt == len(cdn_ids):
|
||||
raise
|
||||
@@ -329,7 +319,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
if not broadcast_no:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
result = channel_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
elif result == -6:
|
||||
self.raise_login_required(
|
||||
'This channel is streaming for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract broadcast number')
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if channel_info.get('BPWD') == 'Y' and password is None:
|
||||
@@ -358,7 +354,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
formats = self._extract_formats(channel_info, broadcast_no, aid)
|
||||
|
||||
station_info = traverse_obj(self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
|
||||
'Downloading channel metadata', 'Unable to download channel metadata',
|
||||
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
|
||||
|
||||
@@ -374,11 +370,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:user'
|
||||
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -386,7 +382,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
@@ -394,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -402,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -414,12 +410,12 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
|
||||
@@ -71,7 +71,7 @@ class AllstarBaseIE(InfoExtractor):
|
||||
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
|
||||
'duration': ('clipLength', {int_or_none}),
|
||||
'filesize': ('clipSizeBytes', {int_or_none}),
|
||||
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('createdDate', {int_or_none(scale=1000)}),
|
||||
'uploader': ('username', {str}),
|
||||
'uploader_id': ('user', '_id', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
|
||||
@@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor):
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
@@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id), # noqa: UP031
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
}
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
return int_or_none(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||
@@ -290,8 +247,6 @@ class AnvatoIE(InfoExtractor):
|
||||
}
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
|
||||
@@ -1,27 +1,42 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'duration': 5369,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
@@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
|
||||
'format_id': join_nonempty(stream_type, vbr),
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
|
||||
@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
@@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2700,
|
||||
'duration': 2670,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
@@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'info_dict': {
|
||||
'id': '13847165',
|
||||
'chapters': 'count:8',
|
||||
'ext': 'mp4',
|
||||
'channel': 'WDR',
|
||||
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'series': 'Lokalzeit aus Düsseldorf',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
|
||||
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'upload_date': '20241031',
|
||||
'timestamp': 1730399400,
|
||||
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
|
||||
'duration': 1759,
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
@@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'age_limit': age_limit,
|
||||
**traverse_obj(media_data, {
|
||||
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
|
||||
'start_time': ('chapterTime', {int_or_none}),
|
||||
'title': ('chapterTitle', {str}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(media_data, ('meta', {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
@@ -470,7 +494,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
|
||||
@@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, video_id, mpd_id='dash', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
|
||||
@@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
|
||||
self._HEADERS['Authorization'] = f'Bearer {token}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
|
||||
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
|
||||
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
|
||||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
|
||||
@@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||
]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
@@ -55,7 +47,9 @@ class AtresPlayerIE(InfoExtractor):
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 400)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
@@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor):
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 403)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
title = episode['titulo']
|
||||
|
||||
|
||||
@@ -4,9 +4,13 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,6 +34,7 @@ class BanByeBaseIE(InfoExtractor):
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
'info_dict': {
|
||||
@@ -58,6 +63,7 @@ class BanByeIE(BanByeBaseIE):
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||
'info_dict': {
|
||||
'id': 'v_kb6_o1Kyq-CD',
|
||||
@@ -77,6 +83,48 @@ class BanByeIE(BanByeBaseIE):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# ['src']['hls']['levels'] variant m3u8 urls only; master m3u8 is 404
|
||||
'url': 'https://banbye.com/watch/v_a_gPFuC9LoW5',
|
||||
'info_dict': {
|
||||
'id': 'v_a_gPFuC9LoW5',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:183524056bebdfa245fd6d214f63c0fe',
|
||||
'description': 'md5:943ac87287ca98d28d8b8797719827c6',
|
||||
'uploader': 'wRealu24',
|
||||
'channel_id': 'ch_wrealu24',
|
||||
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'upload_date': '20231113',
|
||||
'timestamp': 1699874062,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_a_gPFuC9LoW5/96.webp',
|
||||
'tags': ['jaszczur', 'sejm', 'lewica', 'polska', 'ukrainizacja', 'pierwszeposiedzeniesejmu'],
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8'],
|
||||
}, {
|
||||
# ['src']['hls']['masterPlaylist'] m3u8 only
|
||||
'url': 'https://banbye.com/watch/v_B0rsKWsr-aaa',
|
||||
'info_dict': {
|
||||
'id': 'v_B0rsKWsr-aaa',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:00b254164b82101b3f9e5326037447ed',
|
||||
'description': 'md5:3fd8b48aa81954ba024bc60f5de6e167',
|
||||
'uploader': 'PSTV Piotr Szlachtowicz ',
|
||||
'channel_id': 'ch_KV9EVObkB9wB',
|
||||
'channel_url': 'https://banbye.com/channel/ch_KV9EVObkB9wB',
|
||||
'upload_date': '20240629',
|
||||
'timestamp': 1719646816,
|
||||
'duration': 2377,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_B0rsKWsr-aaa/96.webp',
|
||||
'tags': ['Biden', 'Trump', 'Wybory', 'USA'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -91,11 +139,24 @@ class BanByeIE(BanByeBaseIE):
|
||||
'id': f'{quality}p',
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
||||
} for quality in [48, 96, 144, 240, 512, 1080]]
|
||||
formats = [{
|
||||
'format_id': f'http-{quality}p',
|
||||
'quality': quality,
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
|
||||
} for quality in data['quality']]
|
||||
|
||||
formats = []
|
||||
url_data = self._download_json(f'{self._API_BASE}/videos/{video_id}/url', video_id, data=b'')
|
||||
if master_url := traverse_obj(url_data, ('src', 'hls', 'masterPlaylist', {url_or_none})):
|
||||
formats = self._extract_m3u8_formats(master_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
for format_id, format_url in traverse_obj(url_data, (
|
||||
'src', ('mp4', 'hls'), 'levels', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
ext = determine_ext(format_url)
|
||||
is_hls = ext == 'm3u8'
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': 'mp4' if is_hls else ext,
|
||||
'format_id': join_nonempty(is_hls and 'hls', format_id),
|
||||
'protocol': 'm3u8_native' if is_hls else 'https',
|
||||
'height': int_or_none(format_id),
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -6,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
@@ -17,6 +19,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
@@ -40,8 +43,10 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader_url': 'https://youtube-dl.bandcamp.com',
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
'artists': ['youtube-dl "\'/\\ä↭'],
|
||||
'album_artists': ['youtube-dl "\'/\\ä↭'],
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
@@ -266,6 +271,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311756226,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
|
||||
'album_artists': ['Blazo'],
|
||||
'uploader_url': 'https://blazo.bandcamp.com',
|
||||
'release_date': '20110727',
|
||||
'release_timestamp': 1311724800.0,
|
||||
'track': 'Intro',
|
||||
'uploader_id': 'blazo',
|
||||
'track_number': 1,
|
||||
'album': 'Jazz Format Mixtape vol.1',
|
||||
'artists': ['Blazo'],
|
||||
'duration': 19.335,
|
||||
'track_id': '1353101989',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -277,6 +294,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311757238,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
'track': 'Kero One - Keep It Alive (Blazo remix)',
|
||||
'release_date': '20110727',
|
||||
'track_id': '38097443',
|
||||
'track_number': 2,
|
||||
'duration': 181.467,
|
||||
'uploader_url': 'https://blazo.bandcamp.com',
|
||||
'album': 'Jazz Format Mixtape vol.1',
|
||||
'uploader_id': 'blazo',
|
||||
'album_artists': ['Blazo'],
|
||||
'artists': ['Blazo'],
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
|
||||
'release_timestamp': 1311724800.0,
|
||||
},
|
||||
},
|
||||
],
|
||||
@@ -284,6 +313,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'title': 'Jazz Format Mixtape vol.1',
|
||||
'id': 'jazz-format-mixtape-vol-1',
|
||||
'uploader_id': 'blazo',
|
||||
'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
@@ -358,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://bandcamp.com/?show=224',
|
||||
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||
'md5': '61acc9a002bed93986b91168aa3ab433',
|
||||
'info_dict': {
|
||||
'id': '224',
|
||||
'ext': 'opus',
|
||||
'ext': 'mp3',
|
||||
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||
'duration': 5829.77,
|
||||
@@ -371,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'episode_id': '224',
|
||||
},
|
||||
'params': {
|
||||
'format': 'opus-lo',
|
||||
'format': 'mp3-128',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
@@ -459,7 +489,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
'title': 'Discography of coldworldofficial',
|
||||
@@ -473,12 +503,19 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _yield_items(self, webpage):
|
||||
yield from (
|
||||
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
yield from traverse_obj(webpage, (
|
||||
{find_element(id='music-grid', html=True)}, {extract_attributes},
|
||||
'data-client-items', {json.loads}, ..., 'page_url', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader)
|
||||
|
||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
||||
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||
getter=urljoin(url))
|
||||
|
||||
@@ -1284,9 +1284,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1386,7 +1386,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'tbr': ('bitrate', {int_or_none(scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
@@ -1398,7 +1398,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
@@ -1428,7 +1428,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
'timestamp', {int_or_none(scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
68
yt_dlp/extractor/beacon.py
Normal file
68
yt_dlp/extractor/beacon.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BeaconTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://beacon.tv/content/welcome-to-beacon',
|
||||
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
|
||||
'info_dict': {
|
||||
'id': 'welcome-to-beacon',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240509',
|
||||
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
|
||||
'title': 'Your home for Critical Role!',
|
||||
'timestamp': 1715227200,
|
||||
'duration': 105.494,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
|
||||
'md5': 'd879b091485dbed2245094c8152afd89',
|
||||
'info_dict': {
|
||||
'id': 're-slayers-take-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Re-Slayer’s Take | Official Trailer',
|
||||
'timestamp': 1715189040,
|
||||
'upload_date': '20240508',
|
||||
'duration': 53.249,
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', '__APOLLO_STATE__',
|
||||
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
|
||||
if not content_data:
|
||||
raise ExtractorError('Failed to extract content data')
|
||||
|
||||
jwplayer_data = traverse_obj(content_data, (
|
||||
(('contentVideo', 'video', 'videoData'),
|
||||
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
|
||||
if not jwplayer_data:
|
||||
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
|
||||
raise ExtractorError('Content is not a video/podcast', expected=True)
|
||||
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
|
||||
self.raise_login_required('This video/podcast is for members only')
|
||||
raise ExtractorError('Failed to extract content')
|
||||
|
||||
return {
|
||||
**self._parse_jwplayer_data(jwplayer_data, video_id),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
@@ -1,18 +1,33 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes
|
||||
from ..utils import ExtractorError, extract_attributes
|
||||
|
||||
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
|
||||
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _brightcove_url_result(self, video_id, video_block):
|
||||
account_id = video_block.get('accountid') or '876450612001'
|
||||
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
||||
def _extract_video(self, video_block):
|
||||
video_element = self._search_regex(
|
||||
self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
|
||||
if video_element:
|
||||
video_element_attrs = extract_attributes(video_element)
|
||||
video_id = video_element_attrs.get('data-video-id')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_element_attrs.get('data-account') or '876450610001'
|
||||
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
|
||||
else:
|
||||
video_block_attrs = extract_attributes(video_block)
|
||||
video_id = video_block_attrs.get('videoid')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_block_attrs.get('accountid') or '876630703001'
|
||||
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
@@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video_block = extract_attributes(self._search_regex(
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
class BFMTVLiveIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:live'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/en-direct/',
|
||||
'info_dict': {
|
||||
'id': '5615950982001',
|
||||
'id': '6346069778112',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20220926',
|
||||
'timestamp': 1664207191,
|
||||
'upload_date': '20240202',
|
||||
'timestamp': 1706887572,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://.+/image\.jpg',
|
||||
'tags': [],
|
||||
@@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVArticleIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:article'
|
||||
@@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, webpage):
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video = self._extract_video(video_block_el)
|
||||
if video:
|
||||
yield video
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
|
||||
entries = []
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video_block = extract_attributes(video_block_el)
|
||||
video_id = video_block.get('videoid')
|
||||
if not video_id:
|
||||
continue
|
||||
entries.append(self._brightcove_url_result(video_id, video_block))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -50,7 +49,7 @@ class BibelTVBaseIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||
'season_number': 'seasonNumber',
|
||||
'episode_number': 'episodeNumber',
|
||||
|
||||
@@ -31,12 +31,12 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_count,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
@@ -46,7 +46,25 @@ from ..utils import (
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||
_wbi_key_cache = {}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
|
||||
|
||||
def _check_missing_formats(self, play_info, formats):
|
||||
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
|
||||
missing_formats = join_nonempty(*[
|
||||
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
|
||||
for fmt in traverse_obj(play_info, (
|
||||
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||
if missing_formats:
|
||||
self.to_screen(
|
||||
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||
f'become a premium member to download them. {self._login_hint()}')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
@@ -86,18 +104,75 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
|
||||
if missing_formats:
|
||||
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
|
||||
f'you have to login or become premium member to download them. {self._login_hint()}')
|
||||
if formats:
|
||||
self._check_missing_formats(play_info, formats)
|
||||
|
||||
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'duration': ('length', {float_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}))
|
||||
if fragments:
|
||||
formats.append({
|
||||
'url': fragments[0]['url'],
|
||||
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
|
||||
**({
|
||||
'fragments': fragments,
|
||||
'protocol': 'http_dash_segments',
|
||||
} if len(fragments) > 1 else {}),
|
||||
**traverse_obj(play_info, {
|
||||
'quality': ('quality', {int_or_none}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||
'duration': ('timelength', {float_or_none(scale=1000)}),
|
||||
}),
|
||||
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _download_playinfo(self, video_id, cid, headers=None):
|
||||
def _get_wbi_key(self, video_id):
|
||||
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
|
||||
return self._wbi_key_cache['key']
|
||||
|
||||
session_data = self._download_json(
|
||||
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
|
||||
|
||||
lookup = ''.join(traverse_obj(session_data, (
|
||||
'data', 'wbi_img', ('img_url', 'sub_url'),
|
||||
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
|
||||
|
||||
# from getMixinKey() in the vendor js
|
||||
mixin_key_enc_tab = [
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
|
||||
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
|
||||
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
|
||||
36, 20, 34, 44, 52,
|
||||
]
|
||||
|
||||
self._wbi_key_cache.update({
|
||||
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
|
||||
'ts': time.time(),
|
||||
})
|
||||
return self._wbi_key_cache['key']
|
||||
|
||||
def _sign_wbi(self, params, video_id):
|
||||
params['wts'] = round(time.time())
|
||||
params = {
|
||||
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
|
||||
for k, v in sorted(params.items())
|
||||
}
|
||||
query = urllib.parse.urlencode(params)
|
||||
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||
return params
|
||||
|
||||
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||
if qn:
|
||||
params['qn'] = qn
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
||||
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||
query=self._sign_wbi(params, bvid), headers=headers,
|
||||
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -115,15 +190,15 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}],
|
||||
}
|
||||
|
||||
subtitle_info = traverse_obj(self._download_json(
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
|
||||
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
|
||||
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
|
||||
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in subs_list:
|
||||
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
self.report_warning(
|
||||
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in traverse_obj(video_info, (
|
||||
'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
||||
@@ -133,7 +208,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False)
|
||||
note='Extracting chapters', fatal=False, headers=self._HEADERS)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
'start_time': 'from',
|
||||
@@ -203,15 +278,15 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
||||
return cid_edges
|
||||
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo):
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
|
||||
graph_version = traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
@@ -224,7 +299,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
@@ -243,17 +318,17 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'timestamp': 1488353834,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'old av URL version',
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'info_dict': {
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'id': 'BV11x411K7CN',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
'id': 'BV11x411K7CN',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308.36,
|
||||
'upload_date': '20140420',
|
||||
'timestamp': 1397983878,
|
||||
@@ -262,6 +337,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'_old_archive_ids': ['bilibili 1074402_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -288,6 +365,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -308,28 +386,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'video has subtitles',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2',
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
@@ -347,6 +405,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -370,6 +429,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 463665680_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -388,8 +448,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 893839363_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'newer festival video',
|
||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||
@@ -406,8 +466,57 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 778246196_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'legacy flv/mp4 video',
|
||||
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
|
||||
'info_dict': {
|
||||
'id': 'BV1ms411Q7vw_p4',
|
||||
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||
'timestamp': 1458222815,
|
||||
'upload_date': '20160317',
|
||||
'description': '云南方言快乐生产线出品',
|
||||
'duration': float,
|
||||
'uploader': '一笑颠天',
|
||||
'uploader_id': '3916081',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 4120229_part4'],
|
||||
},
|
||||
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
|
||||
'playlist_count': 19,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1ms411Q7vw_p4_0',
|
||||
'ext': 'flv',
|
||||
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||
'duration': 399.102,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'note': 'legacy mp4-only video',
|
||||
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
|
||||
'info_dict': {
|
||||
'id': 'BV1nx411u79K',
|
||||
'ext': 'mp4',
|
||||
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
|
||||
'timestamp': 1508893551,
|
||||
'upload_date': '20171025',
|
||||
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
|
||||
'duration': 80.384,
|
||||
'uploader': '伯远',
|
||||
'uploader_id': '10584494',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 15700301_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'interactive/split-path video',
|
||||
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
||||
@@ -425,6 +534,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||
},
|
||||
'playlist_count': 33,
|
||||
'playlist': [{
|
||||
@@ -443,6 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -465,6 +576,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'upload_date': '20191021',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'note': 'video has subtitles, which requires login',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2', # login required for CC subtitle
|
||||
'_old_archive_ids': ['bilibili 898179753_part1'],
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
'skip': 'login required for subtitle',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
||||
'info_dict': {
|
||||
@@ -489,6 +623,10 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'geo-restricted',
|
||||
}, {
|
||||
'note': 'has - in the last path segment of the url',
|
||||
'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -498,8 +636,9 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
if not self._match_valid_url(urlh.url):
|
||||
return self.url_result(urlh.url)
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
headers['Referer'] = url
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
@@ -548,7 +687,6 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
aid = video_data.get('aid')
|
||||
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
||||
|
||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||
|
||||
festival_info = {}
|
||||
@@ -586,18 +724,65 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
||||
if is_interactive:
|
||||
return self.playlist_result(
|
||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
|
||||
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'formats': self.extract_formats(play_info),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if not traverse_obj(play_info, ('dash')):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
@@ -837,8 +1022,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BilibiliCheeseBaseIE(BilibiliBaseIE):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _extract_episode(self, season_info, ep_id):
|
||||
episode_info = traverse_obj(season_info, (
|
||||
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
|
||||
@@ -968,7 +1151,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
|
||||
}))
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
class BilibiliSpaceBaseIE(BilibiliBaseIE):
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
first_page = fetch_page(0)
|
||||
metadata = get_metadata(first_page)
|
||||
@@ -988,73 +1171,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/313580179/video',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_signature(self, playlist_id):
|
||||
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
||||
|
||||
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
||||
img_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
||||
sub_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
||||
|
||||
session_key = img_key + sub_key
|
||||
|
||||
signature_values = []
|
||||
for position in (
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
||||
57, 62, 11, 36, 20, 34, 44, 52,
|
||||
):
|
||||
char_at_position = try_call(lambda: session_key[position])
|
||||
if char_at_position:
|
||||
signature_values.append(char_at_position)
|
||||
|
||||
return ''.join(signature_values)[:32]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
|
||||
signature = self._extract_signature(playlist_id)
|
||||
|
||||
def fetch_page(page_idx):
|
||||
query = {
|
||||
'keyword': '',
|
||||
'mid': playlist_id,
|
||||
'order': 'pubdate',
|
||||
'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
|
||||
'order_avoided': 'true',
|
||||
'platform': 'web',
|
||||
'pn': page_idx + 1,
|
||||
'ps': 30,
|
||||
'tid': 0,
|
||||
'web_location': 1550101,
|
||||
'wts': int(time.time()),
|
||||
}
|
||||
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
||||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||
headers={'referer': url})
|
||||
response = self._download_json(
|
||||
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
|
||||
query=self._sign_wbi(query, playlist_id),
|
||||
note=f'Downloading space page {page_idx}', headers={'Referer': url})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
if response['code'] in (-352, -401):
|
||||
status_code = response['code']
|
||||
if status_code == -401:
|
||||
raise ExtractorError(
|
||||
f'Request is blocked by server ({-response["code"]}), '
|
||||
'please add cookies, wait and try later.', expected=True)
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
elif status_code == -352 and not self.is_logged_in:
|
||||
self.raise_login_required('Request is rejected, you need to login to access playlist')
|
||||
elif status_code != 0:
|
||||
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
|
||||
return response['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1280,7 +1443,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'info_dict': {
|
||||
'id': r're:\d+',
|
||||
'title': '稍后再看',
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
@@ -1356,14 +1522,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'info_dict': {
|
||||
'id': r're:2_\d+',
|
||||
'title': '稍后再看',
|
||||
'uploader': str,
|
||||
'uploader_id': str,
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
'skip': 'redirect url & login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
@@ -1414,7 +1585,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}, filter),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
@@ -1680,7 +1851,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
@@ -1808,7 +1979,8 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
||||
login_post = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
|
||||
data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': base64.b64encode(password_hash).decode('ascii'),
|
||||
'keep_me': 'true',
|
||||
@@ -2140,7 +2312,8 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||
series_info = self._call_api(
|
||||
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||
return self.playlist_result(
|
||||
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
|
||||
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
|
||||
|
||||
@@ -24,7 +24,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
@@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
@@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor):
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'info_dict': {
|
||||
'id': 'bitchute',
|
||||
'title': 'BitChute',
|
||||
'description': 'md5:5329fb3866125afa9446835594a9b138',
|
||||
'description': 'md5:2134c37d64fc3a4846787c402956adac',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
@@ -210,6 +213,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
@@ -230,7 +236,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _make_url(playlist_id, playlist_type):
|
||||
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
|
||||
def _fetch_page(self, playlist_id, playlist_type, page_num):
|
||||
playlist_url = self._make_url(playlist_id, playlist_type)
|
||||
|
||||
388
yt_dlp/extractor/bluesky.py
Normal file
388
yt_dlp/extractor/bluesky.py
Normal file
@@ -0,0 +1,388 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
truncate_string,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BlueskyIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
|
||||
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
|
||||
'md5': '375539c1930ab05d15585ed772ab54fd',
|
||||
'info_dict': {
|
||||
'id': '3l4omssdl632g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Blu3Blu3Lilith',
|
||||
'uploader_id': 'blu3blue.bsky.social',
|
||||
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
|
||||
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'OMG WE HAVE VIDEOS NOW',
|
||||
'description': 'OMG WE HAVE VIDEOS NOW',
|
||||
'upload_date': '20240921',
|
||||
'timestamp': 1726940605,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
|
||||
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
|
||||
'info_dict': {
|
||||
'id': '3l4qhp7bcs52c',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'souris',
|
||||
'uploader_id': 'souris.moe',
|
||||
'uploader_url': 'https://bsky.app/profile/souris.moe',
|
||||
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l4qhp7bcs52c',
|
||||
'upload_date': '20240922',
|
||||
'timestamp': 1727003838,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
|
||||
'info_dict': {
|
||||
'id': '3l3w4tnezek2e',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'clean',
|
||||
'uploader_id': 'de1.pds.tentacle.expert',
|
||||
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
|
||||
'channel_id': 'did:web:de1.tentacle.expert',
|
||||
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l3w4tnezek2e',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726098823,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
|
||||
'info_dict': {
|
||||
'id': 'XxK3t_5V3ao',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'yunayu',
|
||||
'uploader_id': '@yunayuispink',
|
||||
'uploader_url': 'https://www.youtube.com/@yunayuispink',
|
||||
'channel': 'yunayu',
|
||||
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
|
||||
'description': r're:Have a good goodx10000day',
|
||||
'title': '5min vs 5hours drawing',
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'upload_date': '20241026',
|
||||
'timestamp': 1729967784,
|
||||
'duration': 321,
|
||||
'age_limit': 0,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': [],
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
|
||||
'info_dict': {
|
||||
'id': '222792849',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'LASERBAT',
|
||||
'uploader_id': 'laserbatx',
|
||||
'uploader_url': 'https://laserbatx.bandcamp.com',
|
||||
'artists': ['LASERBAT'],
|
||||
'album_artists': ['LASERBAT'],
|
||||
'album': 'Hari Nezumi [EP]',
|
||||
'track': 'Forward to the End',
|
||||
'title': 'LASERBAT - Forward to the End',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
|
||||
'duration': 228.571,
|
||||
'track_id': '222792849',
|
||||
'release_date': '20230423',
|
||||
'upload_date': '20230423',
|
||||
'timestamp': 1682276040.0,
|
||||
'release_timestamp': 1682276040.0,
|
||||
'track_number': 1,
|
||||
},
|
||||
'add_ie': ['Bandcamp'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
|
||||
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||
'info_dict': {
|
||||
'id': '3l7rdfxhyds2f',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'cinnamon',
|
||||
'uploader_id': 'alt.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
|
||||
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'crazy that i look like this tbh',
|
||||
'description': 'crazy that i look like this tbh',
|
||||
'upload_date': '20241030',
|
||||
'timestamp': 1730332128,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['sexual'],
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
|
||||
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
|
||||
'info_dict': {
|
||||
'id': '3l6zrz6zyl2dr',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'mary🐇',
|
||||
'uploader_id': 'mary.my.id',
|
||||
'uploader_url': 'https://bsky.app/profile/mary.my.id',
|
||||
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l6zrz6zyl2dr',
|
||||
'upload_date': '20241021',
|
||||
'timestamp': 1729523172,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241026',
|
||||
'description': 'One of my favorite videos',
|
||||
'comment_count': int,
|
||||
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
|
||||
'uploader': 'Purple.Ice.Tea',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'like_count': int,
|
||||
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'repost_count': int,
|
||||
'timestamp': 1729973202,
|
||||
'tags': [],
|
||||
'uploader_id': 'purpleicetea.bsky.social',
|
||||
'title': 'One of my favorite videos',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3l77u64l7le2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'hearing people on twitter say that bluesky isn\'...',
|
||||
'like_count': int,
|
||||
'uploader_id': 'thafnine.net',
|
||||
'uploader_url': 'https://bsky.app/profile/thafnine.net',
|
||||
'upload_date': '20241024',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
|
||||
'tags': [],
|
||||
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
|
||||
'uploader': 'T9',
|
||||
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'timestamp': 1729731642,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}],
|
||||
}]
|
||||
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
|
||||
|
||||
def _get_service_endpoint(self, did, video_id):
|
||||
if did.startswith('did:web:'):
|
||||
url = f'https://{did[8:]}/.well-known/did.json'
|
||||
else:
|
||||
url = f'https://plc.directory/{did}'
|
||||
services = self._download_json(
|
||||
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
|
||||
return traverse_obj(
|
||||
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
|
||||
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
|
||||
|
||||
def _real_extract(self, url):
|
||||
handle, video_id = self._match_valid_url(url).group('handle', 'id')
|
||||
|
||||
post = self._download_json(
|
||||
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
|
||||
video_id, query={
|
||||
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
|
||||
'depth': 0,
|
||||
'parentHeight': 0,
|
||||
})['thread']['post']
|
||||
|
||||
entries = []
|
||||
# app.bsky.embed.video.view/app.bsky.embed.external.view
|
||||
entries.extend(self._extract_videos(post, video_id))
|
||||
# app.bsky.embed.recordWithMedia.view
|
||||
entries.extend(self._extract_videos(
|
||||
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
|
||||
# app.bsky.embed.record.view
|
||||
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
|
||||
entries.extend(self._extract_videos(
|
||||
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
|
||||
|
||||
if not entries:
|
||||
raise ExtractorError('No video could be found in this post', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
@staticmethod
|
||||
def _build_profile_url(path):
|
||||
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
|
||||
|
||||
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
|
||||
embed_path = variadic(embed_path, (str, bytes, dict, set))
|
||||
record_path = variadic(record_path, (str, bytes, dict, set))
|
||||
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
|
||||
|
||||
entries = []
|
||||
if external_uri := traverse_obj(root, (
|
||||
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
|
||||
entries.append(self.url_result(external_uri))
|
||||
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
return entries
|
||||
|
||||
video_cid = traverse_obj(
|
||||
root, (*embed_path, 'cid', {str}),
|
||||
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
|
||||
did = traverse_obj(root, ('author', 'did', {str}))
|
||||
|
||||
if did and video_cid:
|
||||
endpoint = self._get_service_endpoint(did, video_id)
|
||||
|
||||
formats.append({
|
||||
'format_id': 'blob',
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'ext': ('mimeType', {mimetype2ext}),
|
||||
})),
|
||||
})
|
||||
|
||||
for sub_data in traverse_obj(root, (
|
||||
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
|
||||
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(root, {
|
||||
'id': ('uri', {url_basename}),
|
||||
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
|
||||
'alt_title': (*embed_path, 'alt', {str}, filter),
|
||||
'uploader': ('author', 'displayName', {str}),
|
||||
'uploader_id': ('author', 'handle', {str}),
|
||||
'uploader_url': ('author', 'handle', {self._build_profile_url}),
|
||||
'channel_id': ('author', 'did', {str}),
|
||||
'channel_url': ('author', 'did', {self._build_profile_url}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'repost_count': ('repostCount', {int_or_none}),
|
||||
'comment_count': ('replyCount', {int_or_none}),
|
||||
'timestamp': ('indexedAt', {parse_iso8601}),
|
||||
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
|
||||
'age_limit': (
|
||||
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
|
||||
'description': (*record_path, 'text', {str}, filter),
|
||||
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
}),
|
||||
})
|
||||
return entries
|
||||
@@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<service>app|ent)\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
@@ -38,10 +38,22 @@ class BoxIE(InfoExtractor):
|
||||
'uploader_id': '239068974',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}, {
|
||||
'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065',
|
||||
'info_dict': {
|
||||
'id': '1536173056065',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '18523128264',
|
||||
'uploader': 'Lexi Hennigan',
|
||||
'title': 'iPSC Symposium recording part 1.mp4',
|
||||
'timestamp': 1716228343,
|
||||
'upload_date': '20240520',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = self._match_valid_url(url).groups()
|
||||
shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service')
|
||||
webpage = self._download_webpage(url, file_id or shared_name)
|
||||
|
||||
if not file_id:
|
||||
@@ -57,14 +69,14 @@ class BoxIE(InfoExtractor):
|
||||
request_token = self._search_json(
|
||||
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Request-Token': request_token,
|
||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||
})[file_id]['read']
|
||||
shared_link = 'https://app.box.com/s/' + shared_name
|
||||
shared_link = f'https://{service}.box.com/s/{shared_name}'
|
||||
f = self._download_json(
|
||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||
'Downloading file JSON metadata', headers={
|
||||
|
||||
@@ -1,35 +1,20 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
@@ -41,12 +26,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'creators': ['Kooperative Berlin'],
|
||||
'description': r're:Joachim Gauck, .*\n\nKamera: .*',
|
||||
'release_date': '20150716',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https?://www\.bpb\.de/cache/images/7/297_teaser_16x9_1240\.jpg.*',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -55,11 +40,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/4/522184_teaser_16x9_1240\.png.*',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -68,11 +54,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/9/518789_teaser_16x9_1240\.jpeg.*',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -84,12 +71,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'creators': ['Axel Schröder'],
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/3/315813_teaser_16x9_1240\.png.*',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -98,12 +85,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'creators': ['Bundeszentrale für politische Bildung'],
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/6/517806_teaser_16x9_1240\.png.*',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -147,7 +134,7 @@ class BpbIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
title_result = traverse_obj(webpage, ({find_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
@@ -156,15 +143,15 @@ class BpbIE(InfoExtractor):
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
{find_element(cls='opening-intro')},
|
||||
[{find_element(tag='bpb-accordion-item')}, {find_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'creators': traverse_obj(self._html_search_meta('author', webpage), all),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('poster', {urljoin(url)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
@@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
@@ -386,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
return update_url_query(
|
||||
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
'https://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
@@ -470,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
'https://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
@@ -480,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
||||
brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
||||
if referer:
|
||||
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
@@ -538,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
|
||||
})
|
||||
|
||||
def build_format_id(kind):
|
||||
format_id = kind
|
||||
if tbr:
|
||||
format_id += f'-{int(tbr)}k'
|
||||
if height:
|
||||
format_id += f'-{height}p'
|
||||
return format_id
|
||||
return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
|
||||
|
||||
if src or streaming_src:
|
||||
f.update({
|
||||
@@ -801,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
# Look for iframe embeds [1]
|
||||
for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
|
||||
|
||||
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||
# [2] looks like:
|
||||
@@ -830,7 +826,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
player_id = player_id or attrs.get('data-player') or 'default'
|
||||
embed = embed or attrs.get('data-embed') or 'default'
|
||||
|
||||
bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
||||
bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
||||
|
||||
# Some brightcove videos may be embedded with video tag only and
|
||||
# without script tag or any mentioning of brightcove at all. Such
|
||||
@@ -867,7 +863,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
||||
base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
|
||||
@@ -8,11 +8,13 @@ from ..utils import (
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
format_field,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BundestagIE(InfoExtractor):
|
||||
@@ -115,9 +117,8 @@ class BundestagIE(InfoExtractor):
|
||||
note='Downloading metadata overlay', fatal=False,
|
||||
), {
|
||||
'title': (
|
||||
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||
{find_element(tag='h3')}, {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({find_element(tag='p')}, {clean_html}),
|
||||
}))
|
||||
|
||||
return result
|
||||
|
||||
@@ -53,7 +53,7 @@ class CaffeineTVIE(InfoExtractor):
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}, {lambda x: x or None}),
|
||||
'tags': ('tags', ..., {str}, filter),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': (((None, 'user'), 'username'), {str}, any),
|
||||
'is_live': ('is_live', {bool}),
|
||||
@@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
|
||||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('preview_image_path', {urljoin(url)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
|
||||
@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1,22 +1,28 @@
|
||||
import base64
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
replace_extension,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
update_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -149,6 +155,7 @@ class CBCIE(InfoExtractor):
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||
@@ -172,21 +179,20 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||
'tags': 'count:8',
|
||||
'categories': ['All in a Weekend Montreal'],
|
||||
'tags': 'count:11',
|
||||
'location': 'Quebec',
|
||||
'series': 'All in a Weekend Montreal',
|
||||
'season': 'Season 2015',
|
||||
'season_number': 2015,
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['Other'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
|
||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'ext': 'mp4',
|
||||
@@ -194,107 +200,168 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
'series': 'CBC News: Windsor at 6:00',
|
||||
'categories': ['News/Canada/Windsor'],
|
||||
'categories': ['Windsor'],
|
||||
'location': 'Windsor',
|
||||
'tags': ['cancer'],
|
||||
'creators': ['Allison Johnson'],
|
||||
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['News'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'https://www.cbc.ca/player/play/1.2985700',
|
||||
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||
'info_dict': {
|
||||
'id': '2657631896',
|
||||
'id': '1.2985700',
|
||||
'ext': 'mp3',
|
||||
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
||||
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||
'tags': 'count:8',
|
||||
'categories': ['All in a Weekend Montreal'],
|
||||
'tags': 'count:11',
|
||||
'location': 'Quebec',
|
||||
'series': 'All in a Weekend Montreal',
|
||||
'season': 'Season 2015',
|
||||
'season_number': 2015,
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['Other'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/1.1711287',
|
||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'id': '1.1711287',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cancer survivor four times over',
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
'series': 'CBC News: Windsor at 6:00',
|
||||
'categories': ['News/Canada/Windsor'],
|
||||
'categories': ['Windsor'],
|
||||
'location': 'Windsor',
|
||||
'tags': ['cancer'],
|
||||
'creators': ['Allison Johnson'],
|
||||
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['News'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'https://www.cbc.ca/player/play/1.7159484',
|
||||
'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6424403',
|
||||
'md5': '8025909eaffcf0adf59922904def9a5e',
|
||||
'info_dict': {
|
||||
'id': '2324213316001',
|
||||
'id': '9.6424403',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | School boards sue social media giants',
|
||||
'description': 'md5:4b4db69322fa32186c3ce426da07402c',
|
||||
'timestamp': 1711681200,
|
||||
'duration': 2743.400,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'title': 'The National | N.W.T. wildfire emergency',
|
||||
'description': 'md5:ada33d36d1df69347ed575905bfd496c',
|
||||
'timestamp': 1718589600,
|
||||
'duration': 2692.833,
|
||||
'subtitles': {
|
||||
'en-US': [{
|
||||
'name': 'English Captions',
|
||||
'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt',
|
||||
}],
|
||||
},
|
||||
'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20240329',
|
||||
'categories': 'count:4',
|
||||
'upload_date': '20240617',
|
||||
'categories': ['News', 'The National', 'The National Latest Broadcasts'],
|
||||
'series': 'The National - Full Show',
|
||||
'tags': 'count:1',
|
||||
'creators': ['News'],
|
||||
'tags': ['The National'],
|
||||
'location': 'Canada',
|
||||
'media_type': 'Full Program',
|
||||
'genres': ['News'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
|
||||
'md5': '188b96cf6bdcb2540e178a6caa957128',
|
||||
'info_dict': {
|
||||
'id': '2334524995812',
|
||||
'id': '1.7194274',
|
||||
'ext': 'mp4',
|
||||
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
|
||||
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
|
||||
'timestamp': 1714788791,
|
||||
'duration': 77.678,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:0',
|
||||
'upload_date': '20240504',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'categories': 'count:3',
|
||||
'series': 'The National',
|
||||
'tags': 'count:15',
|
||||
'creators': ['encoder'],
|
||||
'tags': 'count:17',
|
||||
'location': 'Canada',
|
||||
'media_type': 'Excerpt',
|
||||
'upload_date': '20240504',
|
||||
'genres': ['News'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6427282',
|
||||
'info_dict': {
|
||||
'id': '9.6427282',
|
||||
'ext': 'mp4',
|
||||
'title': 'Men\'s Soccer - Argentina vs Morocco',
|
||||
'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
|
||||
'series': 'CBC Sports',
|
||||
'media_type': 'Event Coverage',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
|
||||
'timestamp': 1721825400.0,
|
||||
'upload_date': '20240724',
|
||||
'duration': 10568.0,
|
||||
'chapters': [],
|
||||
'genres': [],
|
||||
'tags': ['2024 Paris Olympic Games'],
|
||||
'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
|
||||
'location': 'Canada',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6459530',
|
||||
'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
|
||||
'info_dict': {
|
||||
'id': '9.6459530',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parts of Jasper incinerated as wildfire rages',
|
||||
'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
|
||||
'series': 'The National',
|
||||
'media_type': 'Excerpt',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
|
||||
'timestamp': 1721964091.012,
|
||||
'upload_date': '20240726',
|
||||
'duration': 952.285,
|
||||
'chapters': [],
|
||||
'genres': [],
|
||||
'tags': 'count:23',
|
||||
'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6420651',
|
||||
'md5': '71a850c2c6ee5e912de169f5311bb533',
|
||||
'info_dict': {
|
||||
'id': '9.6420651',
|
||||
'ext': 'mp4',
|
||||
'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
|
||||
'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
|
||||
'series': 'CBC News Edmonton',
|
||||
'media_type': 'Excerpt',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
|
||||
'timestamp': 1718220065.768,
|
||||
'upload_date': '20240612',
|
||||
'duration': 286.086,
|
||||
'chapters': [],
|
||||
'genres': ['News'],
|
||||
'categories': ['News', 'Edmonton'],
|
||||
'tags': 'count:7',
|
||||
'location': 'Edmonton',
|
||||
},
|
||||
}, {
|
||||
'url': 'cbcplayer:1.7159484',
|
||||
@@ -307,23 +374,113 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_param(self, asset_data, name):
|
||||
return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if '.' in video_id:
|
||||
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
||||
video_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage,
|
||||
'initial state', video_id)['video']['currentClip']['mediaId']
|
||||
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
||||
data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip']
|
||||
assets = traverse_obj(
|
||||
data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type']))
|
||||
|
||||
if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))):
|
||||
# XXX: Deprecated; CBC is migrating off of ThePlatform
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
|
||||
'force_smil_url': True,
|
||||
}),
|
||||
'id': media_id,
|
||||
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
|
||||
}
|
||||
|
||||
is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live'
|
||||
formats, subtitles = [], {}
|
||||
|
||||
for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))):
|
||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['src'],
|
||||
'name': sub.get('label'),
|
||||
})
|
||||
|
||||
for asset in assets:
|
||||
asset_key = asset['key']
|
||||
asset_type = asset['type']
|
||||
if asset_type != 'medianet':
|
||||
self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}')
|
||||
continue
|
||||
asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON')
|
||||
ext = mimetype2ext(self._parse_param(asset_data, 'contentType'))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live)
|
||||
formats.extend(fmts)
|
||||
# Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
|
||||
if not subtitles:
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if is_live or not fmts:
|
||||
continue
|
||||
# Check for direct https mp4 format
|
||||
best_video_fmt = traverse_obj(fmts, (
|
||||
lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all,
|
||||
{functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {}
|
||||
base_url = self._search_regex(
|
||||
r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None)
|
||||
if not base_url or '/live/' in base_url:
|
||||
continue
|
||||
mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4')
|
||||
if self._request_webpage(
|
||||
HEADRequest(mp4_url), video_id, 'Checking for https format',
|
||||
errnote=False, fatal=False):
|
||||
formats.append({
|
||||
**best_video_fmt,
|
||||
'url': mp4_url,
|
||||
'format_id': 'https-mp4',
|
||||
'protocol': 'https',
|
||||
'manifest_url': None,
|
||||
'acodec': None,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': asset_data['url'],
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None,
|
||||
})
|
||||
|
||||
chapters = traverse_obj(data, (
|
||||
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
'title': ('name', {str}),
|
||||
}))
|
||||
# Filter out pointless single chapters with start_time==0 and no end_time
|
||||
if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')):
|
||||
chapters = []
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', {
|
||||
'force_smil_url': True,
|
||||
}),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
|
||||
'media_type': ('media', 'clipType', {str}),
|
||||
'series': ('showName', {str}),
|
||||
'season_number': ('media', 'season', {int_or_none}),
|
||||
'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}),
|
||||
'location': ('media', 'region', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'genres': ('media', 'genre', all),
|
||||
'categories': ('categories', ..., 'name', {str}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
@@ -365,14 +522,13 @@ class CBCGemIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# This is a normal, public, TV show video
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s06e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoke Signals',
|
||||
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'duration': 1314,
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
||||
'duration': 1324,
|
||||
'categories': ['comedy'],
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season': 'Season 6',
|
||||
@@ -380,19 +536,21 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode': 'Smoke Signals',
|
||||
'episode_number': 1,
|
||||
'episode_id': 'schitts-creek/s06e01',
|
||||
'upload_date': '20210618',
|
||||
'timestamp': 1623988800,
|
||||
'release_date': '20200107',
|
||||
'release_timestamp': 1578427200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# This video requires an account in the browser, but works fine in yt-dlp
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
|
||||
'md5': '297a9600f554f2258aed01514226a697',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s01e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Cup Runneth Over',
|
||||
'description': 'md5:9bca14ea49ab808097530eb05a29e797',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
@@ -401,9 +559,12 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode_id': 'schitts-creek/s01e01',
|
||||
'duration': 1309,
|
||||
'categories': ['comedy'],
|
||||
'upload_date': '20210617',
|
||||
'timestamp': 1623902400,
|
||||
'release_date': '20151124',
|
||||
'release_timestamp': 1448323200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
|
||||
'only_matching': True,
|
||||
@@ -455,10 +616,8 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
def claims_token_expired(self):
|
||||
exp = self._get_claims_token_expiry()
|
||||
if exp - time.time() < 10:
|
||||
# It will expire in less than 10 seconds, or has already expired
|
||||
return True
|
||||
return False
|
||||
# It will expire in less than 10 seconds, or has already expired
|
||||
return exp - time.time() < 10
|
||||
|
||||
def claims_token_valid(self):
|
||||
return self._claims_token is not None and not self.claims_token_expired()
|
||||
@@ -474,38 +633,6 @@ class CBCGemIE(InfoExtractor):
|
||||
return
|
||||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||
|
||||
def _find_secret_formats(self, formats, video_id):
|
||||
""" Find a valid video url and convert it to the secret variant """
|
||||
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
|
||||
if not base_format:
|
||||
return
|
||||
|
||||
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
|
||||
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
|
||||
|
||||
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
|
||||
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
|
||||
return
|
||||
|
||||
for child in secret_xml:
|
||||
if child.attrib.get('Type') != 'video':
|
||||
continue
|
||||
for video_quality in child:
|
||||
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
|
||||
if not bitrate or 'Index' not in video_quality.attrib:
|
||||
continue
|
||||
height = int_or_none(video_quality.attrib.get('MaxHeight'))
|
||||
|
||||
yield {
|
||||
**base_format,
|
||||
'format_id': join_nonempty('sec', height),
|
||||
# Note: \g<1> is necessary instead of \1 since bitrate is a number
|
||||
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
|
||||
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
|
||||
'tbr': bitrate / 1000.0,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json(
|
||||
@@ -519,7 +646,6 @@ class CBCGemIE(InfoExtractor):
|
||||
else:
|
||||
headers = {}
|
||||
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
||||
m3u8_url = m3u8_info.get('url')
|
||||
|
||||
if m3u8_info.get('errorCode') == 1:
|
||||
self.raise_geo_restricted(countries=['CA'])
|
||||
@@ -528,9 +654,9 @@ class CBCGemIE(InfoExtractor):
|
||||
elif m3u8_info.get('errorCode') != 0:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
||||
self._remove_duplicate_formats(formats)
|
||||
formats.extend(self._find_secret_formats(formats, video_id))
|
||||
|
||||
for fmt in formats:
|
||||
if fmt.get('vcodec') == 'none':
|
||||
@@ -546,20 +672,21 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'series': video_info.get('series'),
|
||||
'season_number': video_info.get('season'),
|
||||
'season': f'Season {video_info.get("season")}',
|
||||
'episode_number': video_info.get('episode'),
|
||||
'episode': video_info.get('title'),
|
||||
'episode_id': video_id,
|
||||
'duration': video_info.get('duration'),
|
||||
'categories': [video_info.get('category')],
|
||||
'formats': formats,
|
||||
'release_timestamp': video_info.get('airDate'),
|
||||
'timestamp': video_info.get('availableDate'),
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'series': ('series', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'categories': ('category', {str}, all),
|
||||
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
@@ -649,11 +776,11 @@ class CBCGemLiveIE(InfoExtractor):
|
||||
'title': 'Ottawa',
|
||||
'description': 'The live TV channel and local programming from Ottawa',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
'id': 'AyqZwxRqh8EH',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492106160,
|
||||
'upload_date': '20170413',
|
||||
'release_timestamp': 1492106160,
|
||||
'release_date': '20170413',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Live might have ended',
|
||||
@@ -682,49 +809,84 @@ class CBCGemLiveIE(InfoExtractor):
|
||||
'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
|
||||
'timestamp': 1679706000,
|
||||
'upload_date': '20230325',
|
||||
'release_timestamp': 1679706000,
|
||||
'release_date': '20230325',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Live might have ended',
|
||||
},
|
||||
{ # event replay (medianetlive)
|
||||
'url': 'https://gem.cbc.ca/live-event/42314',
|
||||
'md5': '297a9600f554f2258aed01514226a697',
|
||||
'info_dict': {
|
||||
'id': '42314',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'was_live',
|
||||
'title': 'Women\'s Soccer - Canada vs New Zealand',
|
||||
'description': 'md5:36200e5f1a70982277b5a6ecea86155d',
|
||||
'thumbnail': r're:https://.+default\.jpg',
|
||||
'release_timestamp': 1721917200,
|
||||
'release_date': '20240725',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Replay might no longer be available',
|
||||
},
|
||||
{ # event replay (medianetlive)
|
||||
'url': 'https://gem.cbc.ca/live-event/43273',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
|
||||
|
||||
# Two types of metadata JSON
|
||||
# Three types of video_info JSON: info in root, freeTv stream/item, event replay
|
||||
if not video_info.get('formattedIdMedia'):
|
||||
video_info = traverse_obj(
|
||||
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
|
||||
get_all=False, default={})
|
||||
if traverse_obj(video_info, ('event', 'key')) == video_id:
|
||||
video_info = video_info['event']
|
||||
else:
|
||||
video_info = traverse_obj(video_info, (
|
||||
('freeTv', ('streams', ...)), 'items',
|
||||
lambda _, v: v['key'].partition('-')[0] == video_id, any)) or {}
|
||||
|
||||
video_stream_id = video_info.get('formattedIdMedia')
|
||||
if not video_stream_id:
|
||||
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||
raise ExtractorError(
|
||||
'Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||
|
||||
stream_data = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||
'appCode': 'mpx',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'idMedia': video_stream_id,
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestType': 'desktop',
|
||||
})
|
||||
live_status = 'was_live' if video_info.get('isVodEnabled') else 'is_live'
|
||||
release_timestamp = traverse_obj(video_info, ('airDate', {parse_iso8601}))
|
||||
|
||||
if live_status == 'is_live' and release_timestamp and release_timestamp > time.time():
|
||||
formats = []
|
||||
live_status = 'is_upcoming'
|
||||
self.raise_no_formats('This livestream has not yet started', expected=True)
|
||||
else:
|
||||
stream_data = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||
'appCode': 'medianetlive',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'idMedia': video_stream_id,
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestType': 'desktop',
|
||||
})
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_data['url'], video_id, 'mp4', live=live_status == 'is_live')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': release_timestamp,
|
||||
**traverse_obj(video_info, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('images', 'card', 'url'),
|
||||
'timestamp': ('airDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import base64
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import zlib
|
||||
|
||||
@@ -97,7 +96,7 @@ class CBSNewsBaseIE(InfoExtractor):
|
||||
**traverse_obj(item, {
|
||||
'title': (None, ('fulltitle', 'title')),
|
||||
'description': 'dek',
|
||||
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
|
||||
'timestamp': ('timestamp', {float_or_none(scale=1000)}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'subtitles': ('captions', {get_subtitles}),
|
||||
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
|
||||
|
||||
@@ -12,53 +12,86 @@ from ..utils import (
|
||||
|
||||
|
||||
class CCMAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
|
||||
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||
'info_dict': {
|
||||
'id': '5630208',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
'alt_title': 'EsportMarató2016WEB_PerPublicar',
|
||||
'duration': 79,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
|
||||
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'categories': ['Divulgació'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
'md5': 'fa3e38f269329a278271276330261425',
|
||||
'info_dict': {
|
||||
'id': '943685',
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'upload_date': '20161217',
|
||||
'timestamp': 1482011700,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
'series': 'Tot gira',
|
||||
'duration': 821,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
|
||||
'md5': '27493513d08a3e5605814aee9bb778d2',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'timestamp': 1582577919,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'subtitles': 'mincount:1',
|
||||
'age_limit': 13,
|
||||
'series': 'Crims',
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
|
||||
'duration': 3203,
|
||||
'categories': ['Divulgació'],
|
||||
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
|
||||
'info_dict': {
|
||||
'id': '5759227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Una mosca volava per la llum',
|
||||
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
|
||||
'description': 'md5:9ab64276944b0825336f4147f13f7854',
|
||||
'series': 'Mic',
|
||||
'upload_date': '20180411',
|
||||
'timestamp': 1523440105,
|
||||
'duration': 160,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
|
||||
'categories': ['Música'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_type, media_id = self._match_valid_url(url).groups()
|
||||
media_type, media_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
media = self._download_json(
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
|
||||
@@ -12,6 +12,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_ord
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
@@ -351,3 +352,50 @@ class CDAIE(InfoExtractor):
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
'info_dict': {
|
||||
'id': '31188385',
|
||||
'title': 'SERIA DRUGA',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
|
||||
'info_dict': {
|
||||
'id': '2664592',
|
||||
'title': 'VideoDowcipy - wszystkie odcinki',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
|
||||
'info_dict': {
|
||||
'id': '19129979',
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
|
||||
|
||||
webpage = self._download_webpage(url, folder_id)
|
||||
|
||||
def extract_page_entries(page):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
|
||||
f'Downloading page {page + 1}', expected_status=404)
|
||||
items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
|
||||
for video_id in items:
|
||||
yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
|
||||
folder_id, self._og_search_title(webpage))
|
||||
|
||||
@@ -1,63 +1,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
from .vidyard import VidyardBaseIE, VidyardIE
|
||||
from ..utils import ExtractorError, make_archive_id, url_basename
|
||||
|
||||
|
||||
class CellebriteIE(InfoExtractor):
|
||||
class CellebriteIE(VidyardBaseIE):
|
||||
_VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
|
||||
'info_dict': {
|
||||
'id': '16025876',
|
||||
'id': 'ZqmUss3dQfEMGpauambPuH',
|
||||
'display_id': '16025876',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
|
||||
'duration': 455,
|
||||
'tags': [],
|
||||
'description': 'md5:dee48fe12bbae5c01fe6a053f7676da4',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'duration': 455.979,
|
||||
'_old_archive_ids': ['cellebrite 16025876'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
|
||||
'info_dict': {
|
||||
'id': '29018255',
|
||||
'id': 'QV1U8a2yzcxigw7VFnqKyg',
|
||||
'display_id': '29018255',
|
||||
'ext': 'mp4',
|
||||
'duration': 134,
|
||||
'tags': [],
|
||||
'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
|
||||
'title': 'How to Lawfully Collect the Maximum Amount of Data From Android Devices',
|
||||
'description': 'md5:0e943a9ac14c374d5d74faed634d773c',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
|
||||
'title': 'Android Extractions Explained',
|
||||
'duration': 134.315,
|
||||
'_old_archive_ids': ['cellebrite 29018255'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitles(self, json_data, display_id):
|
||||
formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
|
||||
subtitles = {}
|
||||
|
||||
for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
|
||||
fmt, sub = self._extract_m3u8_formats_and_subtitles(
|
||||
url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(sub, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
slug = self._match_id(url)
|
||||
webpage = self._download_webpage(url, slug)
|
||||
vidyard_url = next(VidyardIE._extract_embed_urls(url, webpage), None)
|
||||
if not vidyard_url:
|
||||
raise ExtractorError('No Vidyard video embeds found on page')
|
||||
|
||||
player_uuid = self._search_regex(
|
||||
r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
|
||||
video_id = url_basename(vidyard_url)
|
||||
info = self._process_video_json(self._fetch_video_json(video_id)['chapters'][0], video_id)
|
||||
if info.get('display_id'):
|
||||
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||
if thumbnail := self._og_search_thumbnail(webpage, default=None):
|
||||
info.setdefault('thumbnails', []).append({'url': thumbnail})
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
|
||||
return {
|
||||
'id': str(json_data['videoId']),
|
||||
'title': json_data.get('name') or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': json_data.get('description') or self._og_search_description(webpage),
|
||||
'duration': json_data.get('seconds'),
|
||||
'tags': json_data.get('tags'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'http_headers': {'Referer': 'https://play.vidyard.com/'},
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
**info,
|
||||
}
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
@@ -36,7 +34,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
live_detail = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
||||
f'https://api.chzzk.naver.com/service/v3/channels/{channel_id}/live-detail', channel_id,
|
||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
@@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
@@ -106,30 +104,77 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
'upload_date': '20231219',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Replay video is expired',
|
||||
}, {
|
||||
# Manually uploaded video
|
||||
'url': 'https://chzzk.naver.com/video/1980',
|
||||
'info_dict': {
|
||||
'id': '1980',
|
||||
'ext': 'mp4',
|
||||
'title': '※시청주의※한번보면 잊기 힘든 영상',
|
||||
'channel': '라디유radiyu',
|
||||
'channel_id': '68f895c59a1043bc5019b5e08c83a5c5',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 95,
|
||||
'timestamp': 1703102631.722,
|
||||
'upload_date': '20231220',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# Partner channel replay video
|
||||
'url': 'https://chzzk.naver.com/video/2458',
|
||||
'info_dict': {
|
||||
'id': '2458',
|
||||
'ext': 'mp4',
|
||||
'title': '첫 방송',
|
||||
'channel': '강지',
|
||||
'channel_id': 'b5ed5db484d04faf4d150aedd362f34b',
|
||||
'channel_is_verified': True,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 4433,
|
||||
'timestamp': 1703307460.214,
|
||||
'upload_date': '20231223',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
||||
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
|
||||
video_status = video_meta.get('vodStatus')
|
||||
if video_status == 'UPLOAD':
|
||||
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
|
||||
elif video_status == 'ABR_HLS':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
|
||||
video_id, query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
})
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
|
||||
formats, subtitles = [], {}
|
||||
live_status = 'post_live' if live_status == 'was_live' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'timestamp': ('publishDateAt', {float_or_none(scale=1000)}),
|
||||
'view_count': ('readCount', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
@@ -85,7 +86,7 @@ class CineverseIE(CineverseBaseIE):
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
|
||||
@@ -6,11 +6,11 @@ from .common import InfoExtractor
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
|
||||
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
]
|
||||
_TESTS = [{
|
||||
@@ -24,6 +24,14 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
|
||||
'info_dict': {
|
||||
'id': '0e8e040aec776862e1d632a699edf59e',
|
||||
'ext': 'mp4',
|
||||
'title': '0e8e040aec776862e1d632a699edf59e',
|
||||
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
'only_matching': True,
|
||||
@@ -36,6 +44,9 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
},
|
||||
}, {
|
||||
# Video-only m3u8 formats need manual fixup
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
|
||||
'md5': 'fc472e40f6e6238446509be411c920e2',
|
||||
'info_dict': {
|
||||
'id': '08j_d24-6000-074',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240620',
|
||||
'duration': 1673,
|
||||
'title': 'D24-6000-074-cetstud',
|
||||
'timestamp': 1718902233,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
@@ -63,6 +79,9 @@ class CloudyCDNIE(InfoExtractor):
|
||||
formats, subtitles = [], {}
|
||||
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
||||
for fmt in fmts:
|
||||
if re.search(r'chunklist_b\d+_vo_', fmt['url']):
|
||||
fmt['acodec'] = 'none'
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
|
||||
@@ -1,146 +1,225 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import merge_dicts, try_call, url_basename
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_call,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_elements, traverse_obj
|
||||
|
||||
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'info_dict': {
|
||||
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
|
||||
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
'upload_date': '20240531',
|
||||
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
|
||||
'duration': 373.0,
|
||||
'timestamp': 1717148586,
|
||||
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
|
||||
'modified_date': '20240531',
|
||||
'modified_timestamp': 1717150140,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'info_dict': {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
|
||||
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'ext': 'mp4',
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
|
||||
'title': 'Here’s how some inmates in closely divided state are now able to vote from jail',
|
||||
'timestamp': 1718158269,
|
||||
'upload_date': '20240612',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
|
||||
'duration': 202.0,
|
||||
'modified_date': '20240612',
|
||||
'modified_timestamp': 1718158509,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
|
||||
'info_dict': {
|
||||
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
|
||||
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
|
||||
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
|
||||
'title': 'Video shows King Charles\' portrait being vandalized by activists',
|
||||
'timestamp': 1718113852,
|
||||
'upload_date': '20240611',
|
||||
'duration': 51.0,
|
||||
'modified_timestamp': 1718116193,
|
||||
'modified_date': '20240611',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
|
||||
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
|
||||
'duration': 158.0,
|
||||
'title': 'Robin Meade signs off after HLN\'s last broadcast',
|
||||
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670284296,
|
||||
'modified_timestamp': 1670332404,
|
||||
'modified_date': '20221206',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'info_dict': {
|
||||
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'timestamp': 1729501452,
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
|
||||
'description': 'md5:256ee7137d161f776cda429654135e52',
|
||||
'upload_date': '20241021',
|
||||
'duration': 31.0,
|
||||
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
|
||||
'modified_date': '20241021',
|
||||
'modified_timestamp': 1729501530,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
|
||||
'info_dict': {
|
||||
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
|
||||
'info_dict': {
|
||||
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
|
||||
'ext': 'mp4',
|
||||
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
|
||||
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
|
||||
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
|
||||
'duration': 173.0,
|
||||
'timestamp': 1729122182,
|
||||
'upload_date': '20241016',
|
||||
'modified_timestamp': 1729194706,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'md5': '11604ab4af83b650826753f1ccb8ecff',
|
||||
'info_dict': {
|
||||
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
|
||||
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
|
||||
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
|
||||
'duration': 145.0,
|
||||
'timestamp': 1729137765,
|
||||
'upload_date': '20241017',
|
||||
'modified_timestamp': 1729138184,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}],
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = self._match_valid_url(url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
display_id = self._match_valid_url(url).group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_id = traverse_obj(
|
||||
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
|
||||
('TOP_AUTH_SERVICE_APP_ID', {str}))
|
||||
|
||||
entries = []
|
||||
for player_data in traverse_obj(webpage, (
|
||||
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
|
||||
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
|
||||
media_id = player_data['data-media-id']
|
||||
parent_uri = player_data.get('data-video-resource-parent-uri')
|
||||
formats, subtitles = [], {}
|
||||
|
||||
video_data = {}
|
||||
if parent_uri:
|
||||
video_data = self._download_json(
|
||||
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
|
||||
query={
|
||||
'id': media_id,
|
||||
'stellarUri': parent_uri,
|
||||
})
|
||||
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
|
||||
resolution, bitrate = None, None
|
||||
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
|
||||
resolution, bitrate = mobj.group('res', 'tbr')
|
||||
formats.append({
|
||||
'url': direct_url,
|
||||
'format_id': 'direct',
|
||||
'quality': 1,
|
||||
'tbr': int_or_none(bitrate),
|
||||
**parse_resolution(resolution),
|
||||
})
|
||||
for sub_data in traverse_obj(video_data, (
|
||||
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
|
||||
'url': sub_data['url'],
|
||||
'name': sub_data.get('label'),
|
||||
})
|
||||
|
||||
if app_id:
|
||||
media_data = self._download_json(
|
||||
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
|
||||
query={'appId': app_id})
|
||||
m3u8_url = traverse_obj(media_data, (
|
||||
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
|
||||
if m3u8_url:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
entries.append({
|
||||
**traverse_obj(player_data, {
|
||||
'title': ('data-headline', {clean_html}),
|
||||
'description': ('data-description', {clean_html}),
|
||||
'duration': ('data-duration', {parse_duration}),
|
||||
'timestamp': ('data-publish-date', {parse_iso8601}),
|
||||
'thumbnail': (
|
||||
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
|
||||
{update_url(query='c=original')}),
|
||||
'display_id': 'data-video-slug',
|
||||
}),
|
||||
**traverse_obj(video_data, {
|
||||
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
|
||||
'description': ('description', {clean_html}),
|
||||
'title': ('headline', {str}),
|
||||
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
|
||||
'duration': ('trt', {int_or_none}),
|
||||
}),
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||
_TEST = {
|
||||
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminalizing journalism?',
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
|
||||
class CNNIndonesiaIE(InfoExtractor):
|
||||
|
||||
@@ -35,6 +35,7 @@ from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
TransportError,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
@@ -46,6 +47,7 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
ISO639Utils,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
@@ -234,7 +236,14 @@ class InfoExtractor:
|
||||
'maybe' if the format may have DRM and has to be tested before download.
|
||||
* extra_param_to_segment_url A query string to append to each
|
||||
fragment's URL, or to update each existing query string
|
||||
with. Only applied by the native HLS/DASH downloaders.
|
||||
with. If it is an HLS stream with an AES-128 decryption key,
|
||||
the query paramaters will be passed to the key URI as well,
|
||||
unless there is an `extra_param_to_key_url` given,
|
||||
or unless an external key URI is provided via `hls_aes`.
|
||||
Only applied by the native HLS/DASH downloaders.
|
||||
* extra_param_to_key_url A query string to append to the URL
|
||||
of the format's HLS AES-128 decryption key.
|
||||
Only applied by the native HLS downloader.
|
||||
* hls_aes A dictionary of HLS AES-128 decryption information
|
||||
used by the native HLS downloader to override the
|
||||
values in the media playlist when an '#EXT-X-KEY' tag
|
||||
@@ -325,7 +334,7 @@ class InfoExtractor:
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||
average_rating: Average rating given by users, the scale used depends on the webpage
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
properties (all but one of text or html optional):
|
||||
@@ -512,7 +521,7 @@ class InfoExtractor:
|
||||
or _extract_from_webpage as necessary. While these are normally classmethods,
|
||||
_extract_from_webpage is allowed to be an instance method.
|
||||
|
||||
_extract_from_webpage may raise self.StopExtraction() to stop further
|
||||
_extract_from_webpage may raise self.StopExtraction to stop further
|
||||
processing of the webpage and obtain exclusive rights to it. This is useful
|
||||
when the extractor cannot reliably be matched using just the URL,
|
||||
e.g. invidious/peertube instances
|
||||
@@ -565,13 +574,13 @@ class InfoExtractor:
|
||||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}',
|
||||
'password': f'Use {password_hint}',
|
||||
'cookies': (
|
||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
|
||||
'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}',
|
||||
'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}',
|
||||
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
@@ -958,6 +967,9 @@ class InfoExtractor:
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
if content is False:
|
||||
assert not fatal
|
||||
return False
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
@@ -1032,7 +1044,15 @@ class InfoExtractor:
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
try:
|
||||
webpage_bytes = urlh.read()
|
||||
except TransportError as err:
|
||||
errmsg = f'{video_id}: Error reading response: {err.msg}'
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, cause=err)
|
||||
self.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
@@ -1389,6 +1409,13 @@ class InfoExtractor:
|
||||
return None, None
|
||||
|
||||
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
||||
|
||||
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
|
||||
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
|
||||
if sys.version_info < (3, 11):
|
||||
return tuple(x if x != '""' else '' for x in info[::2])
|
||||
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
@@ -1551,7 +1578,9 @@ class InfoExtractor:
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
for mobj in re.finditer(JSON_LD_RE, html):
|
||||
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
|
||||
json_ld_item = self._parse_json(
|
||||
mobj.group('json_ld'), video_id, fatal=fatal,
|
||||
errnote=False if default is not NO_DEFAULT else None)
|
||||
for json_ld in variadic(json_ld_item):
|
||||
if isinstance(json_ld, dict):
|
||||
yield json_ld
|
||||
@@ -1691,7 +1720,7 @@ class InfoExtractor:
|
||||
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
|
||||
if rating is not None:
|
||||
info['average_rating'] = rating
|
||||
if is_type(e, 'TVEpisode', 'Episode'):
|
||||
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': episode_name,
|
||||
@@ -2058,7 +2087,7 @@ class InfoExtractor:
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
@@ -2215,6 +2244,11 @@ class InfoExtractor:
|
||||
'quality': quality,
|
||||
'has_drm': has_drm,
|
||||
}
|
||||
|
||||
# YouTube-specific
|
||||
if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
|
||||
f['language'] = yt_audio_content_id.split('.')[0]
|
||||
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
@@ -2788,11 +2822,11 @@ class InfoExtractor:
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if try_call(lambda: base_url_e.text) is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
if re.match(r'https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and base_url.startswith('/'):
|
||||
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
||||
elif mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
elif mpd_base_url and not re.match(r'https?://', base_url):
|
||||
if not mpd_base_url.endswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
@@ -2882,7 +2916,7 @@ class InfoExtractor:
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
return 'url' if re.match(r'https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
@@ -3047,7 +3081,11 @@ class InfoExtractor:
|
||||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
stream_language = stream.get('Language', 'und')
|
||||
# IsmFD expects ISO 639 Set 2 language codes (3-character length)
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
|
||||
stream_language = stream.get('Language') or 'und'
|
||||
if len(stream_language) != 3:
|
||||
stream_language = ISO639Utils.short2long(stream_language) or 'und'
|
||||
for track in stream.findall('QualityLevel'):
|
||||
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
|
||||
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
|
||||
@@ -3138,7 +3176,7 @@ class InfoExtractor:
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None):
|
||||
def absolute_url(item_url):
|
||||
return urljoin(base_url, item_url)
|
||||
|
||||
@@ -3162,11 +3200,11 @@ class InfoExtractor:
|
||||
formats = self._extract_m3u8_formats(
|
||||
full_url, video_id, ext='mp4',
|
||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
||||
preference=preference, quality=quality, fatal=False)
|
||||
preference=preference, quality=quality, fatal=False, headers=_headers)
|
||||
elif ext == 'mpd':
|
||||
is_plain_url = False
|
||||
formats = self._extract_mpd_formats(
|
||||
full_url, video_id, mpd_id=mpd_id, fatal=False)
|
||||
full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers)
|
||||
else:
|
||||
is_plain_url = True
|
||||
formats = [{
|
||||
@@ -3260,6 +3298,8 @@ class InfoExtractor:
|
||||
})
|
||||
for f in media_info['formats']:
|
||||
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||
if _headers:
|
||||
f['http_headers'].update(_headers)
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
@@ -3475,7 +3515,7 @@ class InfoExtractor:
|
||||
continue
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
|
||||
m_paths = re.finditer(
|
||||
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||
paths = orderedSet(m.group(1) for m in m_paths)
|
||||
build_url = lambda path: urllib.parse.urljoin(base_url, path)
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video_params(self, webpage, display_id):
|
||||
|
||||
@@ -456,7 +456,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'duration': ('duration_ms', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
@@ -484,7 +484,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'duration': ('duration_ms', {float_or_none(scale=1000)}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -6,12 +6,37 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/all-american-homecoming/ready-or-not/?play=d848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'info_dict': {
|
||||
'id': 'd848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ready Or Not',
|
||||
'description': 'Simone is concerned about changes taking place at Bringston; JR makes a decision about his future.',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 2547,
|
||||
'timestamp': 1720519200,
|
||||
'uploader': 'CWTV',
|
||||
'chapters': 'count:6',
|
||||
'series': 'All American: Homecoming',
|
||||
'season_number': 3,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20240709',
|
||||
'season': 'Season 3',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
@@ -69,13 +94,14 @@ class CWTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)
|
||||
f'https://images.cwtv.com/feed/mobileapp/video-meta/apiversion_12/guid_{video_id}', video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U'
|
||||
mpx_url = update_url_query(
|
||||
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
|
||||
{'formats': 'M3U+none'})
|
||||
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
|
||||
@@ -2,6 +2,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -52,7 +53,7 @@ class DailyMailIE(InfoExtractor):
|
||||
is_hls = container == 'M2TS'
|
||||
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
|
||||
formats.append({
|
||||
'format_id': ('hls' if is_hls else protocol) + (f'-{tbr}' if tbr else ''),
|
||||
'format_id': join_nonempty('hls' if is_hls else protocol, tbr),
|
||||
'url': rendition_url,
|
||||
'width': int_or_none(rendition.get('frameWidth')),
|
||||
'height': int_or_none(rendition.get('frameHeight')),
|
||||
|
||||
@@ -10,11 +10,14 @@ from ..utils import (
|
||||
OnDemandPagedList,
|
||||
age_restricted,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -98,12 +101,20 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
dai\.ly/|
|
||||
(?:
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
|
||||
(?:www\.)?lequipe\.fr
|
||||
)/
|
||||
(?:
|
||||
swf/(?!video)|
|
||||
(?:(?:crawler|embed|swf)/)?video/|
|
||||
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
|
||||
)
|
||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
)
|
||||
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||
_TESTS = [{
|
||||
@@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
||||
@@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['en_quete_d_esprit'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
@@ -217,6 +228,66 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||
'only_matching': True,
|
||||
}, { # playlist-only
|
||||
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://dai.ly/x94cnnk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
|
||||
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
|
||||
'info_dict': {
|
||||
'id': 'x93blhi',
|
||||
'ext': 'mp4',
|
||||
'title': 'OnAir - 01/08/24',
|
||||
'description': '',
|
||||
'duration': 217,
|
||||
'timestamp': 1722505658,
|
||||
'upload_date': '20240801',
|
||||
'uploader': 'Financialounge',
|
||||
'uploader_id': 'x2vtgmm',
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
|
||||
'info_dict': {
|
||||
'id': 'x7wdsj',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
# https://www.dailymotion.com/crawler/video/x8u4owg
|
||||
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
|
||||
'info_dict': {
|
||||
'id': 'x8u4owg',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Le Parisien',
|
||||
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
|
||||
'upload_date': '20240309',
|
||||
'view_count': int,
|
||||
'timestamp': 1709997866,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x32f7b',
|
||||
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
|
||||
'duration': 428.0,
|
||||
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_COMMON_MEDIA_FIELDS = '''description
|
||||
@@ -232,16 +303,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
player_url = url_or_none(attrs.get('src'))
|
||||
if not player_url:
|
||||
continue
|
||||
player_url = player_url.replace('.js', '.html')
|
||||
if player_url.startswith('//'):
|
||||
player_url = f'https:{player_url}'
|
||||
if video_id := attrs.get('data-video'):
|
||||
query_string = f'video={video_id}'
|
||||
elif playlist_id := attrs.get('data-playlist'):
|
||||
query_string = f'playlist={playlist_id}'
|
||||
else:
|
||||
continue
|
||||
yield update_url(player_url, query=query_string)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
||||
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
||||
|
||||
if playlist_id:
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
if is_playlist: # We matched the playlist query param as video_id
|
||||
playlist_id = video_id
|
||||
video_id = None
|
||||
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
f'http://www.dailymotion.com/playlist/{playlist_id}',
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
media = self._call_api(
|
||||
@@ -282,6 +372,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
title = metadata['title']
|
||||
is_live = media.get('isOnAir')
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for m in media_list:
|
||||
media_url = m.get('url')
|
||||
@@ -289,8 +381,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if media_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
@@ -310,20 +404,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||
f['fps'] = 60
|
||||
|
||||
subtitles = {}
|
||||
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
||||
for subtitle_lang, subtitle in subtitles_data.items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'url': subtitle_url,
|
||||
} for subtitle_url in subtitle.get('urls', [])]
|
||||
|
||||
thumbnails = []
|
||||
for height, poster_url in metadata.get('posters', {}).items():
|
||||
thumbnails.append({
|
||||
'height': int_or_none(height),
|
||||
'id': height,
|
||||
'url': poster_url,
|
||||
})
|
||||
thumbnails = traverse_obj(metadata, (
|
||||
('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
|
||||
'height': (0, {int_or_none}),
|
||||
'id': (0, {str}),
|
||||
'url': 1,
|
||||
}))
|
||||
|
||||
owner = metadata.get('owner') or {}
|
||||
stats = media.get('stats') or {}
|
||||
@@ -447,7 +539,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
|
||||
@@ -40,7 +40,7 @@ class DangalPlayBaseIE(InfoExtractor):
|
||||
'id': ('content_id', {str}),
|
||||
'title': ('display_title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'series': ('show_name', {str}, {lambda x: x or None}),
|
||||
'series': ('show_name', {str}, filter),
|
||||
'series_id': ('catalog_id', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'release_timestamp': ('release_date_uts', {int_or_none}),
|
||||
|
||||
@@ -1,17 +1,20 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_resolution,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DigitalConcertHallIE(InfoExtractor):
|
||||
IE_DESC = 'DigitalConcertHall extractor'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'digitalconcerthall'
|
||||
_TESTS = [{
|
||||
@@ -26,7 +29,8 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'upload_date': '20210624',
|
||||
'timestamp': 1624548600,
|
||||
'duration': 2798,
|
||||
'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
|
||||
'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
|
||||
'composers': ['Kurt Weill'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -34,8 +38,9 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'url': 'https://www.digitalconcerthall.com/en/concert/53785',
|
||||
'info_dict': {
|
||||
'id': '53785',
|
||||
'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
|
||||
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
|
||||
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 3,
|
||||
@@ -49,39 +54,59 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
'upload_date': '20220714',
|
||||
'timestamp': 1657785600,
|
||||
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
|
||||
'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'Concert with several works and an interview',
|
||||
'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
|
||||
'info_dict': {
|
||||
'id': '53785',
|
||||
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
|
||||
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
token_response = self._download_json(
|
||||
login_token = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
||||
'affiliate': 'none',
|
||||
'grant_type': 'device',
|
||||
'device_vendor': 'unknown',
|
||||
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio
|
||||
'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari',
|
||||
'app_id': 'dch.webapp',
|
||||
'app_version': '1.0.0',
|
||||
'app_distributor': 'berlinphil',
|
||||
'app_version': '1.84.0',
|
||||
'client_secret': '2ySLN+2Fwb',
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
self._ACCESS_TOKEN = token_response['access_token']
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})['access_token']
|
||||
try:
|
||||
self._download_json(
|
||||
login_response = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'Referer': 'https://www.digitalconcerthall.com',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
'Authorization': f'Bearer {login_token}',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
except ExtractorError:
|
||||
self.raise_login_required(msg='Login info incorrect')
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
raise
|
||||
self._ACCESS_TOKEN = login_response['access_token']
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._ACCESS_TOKEN:
|
||||
@@ -95,17 +120,20 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
|
||||
m3u8_url = traverse_obj(
|
||||
stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
formats = []
|
||||
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for fmt in formats:
|
||||
if fmt.get('format_note') and fmt.get('vcodec') == 'none':
|
||||
fmt.update(parse_codecs(fmt['format_note']))
|
||||
|
||||
yield {
|
||||
'id': video_id,
|
||||
'title': item.get('title'),
|
||||
'composer': item.get('name_composer'),
|
||||
'url': m3u8_url,
|
||||
'formats': formats,
|
||||
'duration': item.get('duration_total'),
|
||||
'timestamp': traverse_obj(item, ('date', 'published')),
|
||||
@@ -119,31 +147,34 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
|
||||
language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part')
|
||||
if not language:
|
||||
language = 'en'
|
||||
|
||||
thumbnail_url = self._html_search_regex(
|
||||
r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
|
||||
self._download_webpage(url, video_id), 'thumbnail')
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
**parse_resolution(thumbnail_url),
|
||||
}]
|
||||
|
||||
api_type = 'concert' if type_ == 'work' else type_
|
||||
vid_info = self._download_json(
|
||||
f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
|
||||
f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
})
|
||||
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
|
||||
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||
|
||||
if type_ == 'work':
|
||||
videos = [videos[int(part) - 1]]
|
||||
|
||||
album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name', {str}))
|
||||
thumbnail = traverse_obj(vid_info, (
|
||||
'image', ..., {self._proto_relative_url}, {url_or_none},
|
||||
{lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': vid_info.get('title'),
|
||||
'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
|
||||
'thumbnails': thumbnails,
|
||||
'album_artist': album_artist,
|
||||
'entries': self._entries(
|
||||
videos, language, type_, thumbnail=thumbnail, album_artists=album_artists),
|
||||
'thumbnail': thumbnail,
|
||||
'album_artists': album_artists,
|
||||
}
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
import random
|
||||
import string
|
||||
import urllib.parse
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
go\.discovery|
|
||||
www\.
|
||||
(?:
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc
|
||||
)|
|
||||
watch\.
|
||||
(?:
|
||||
hgtv|
|
||||
foodnetwork|
|
||||
travelchannel|
|
||||
diynetwork|
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
|
||||
'info_dict': {
|
||||
'id': '5a2f35ce6b66d17a5026e29e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Riding with Matthew Perry',
|
||||
'description': 'md5:a34333153e79bc4526019a5129e7f878',
|
||||
'duration': 84,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# using `show_slug` is important to get the correct video data
|
||||
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_GEO_BYPASS = False
|
||||
_API_BASE_URL = 'https://api.discovery.com/v1/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, show_slug, display_id = self._match_valid_url(url).groups()
|
||||
|
||||
access_token = None
|
||||
cookies = self._get_cookies(url)
|
||||
|
||||
# prefer Affiliate Auth Token over Anonymous Auth Token
|
||||
auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
|
||||
if auth_storage_cookie and auth_storage_cookie.value:
|
||||
auth_storage = self._parse_json(urllib.parse.unquote(
|
||||
urllib.parse.unquote(auth_storage_cookie.value)),
|
||||
display_id, fatal=False) or {}
|
||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
f'https://{site}.com/anonymous', display_id,
|
||||
'Downloading token JSON metadata', query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join(random.choices(string.ascii_letters, k=32)),
|
||||
'redirectUri': 'https://www.discovery.com/',
|
||||
})['access_token']
|
||||
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
try:
|
||||
video = self._download_json(
|
||||
self._API_BASE_URL + 'content/videos',
|
||||
display_id, 'Downloading content JSON metadata',
|
||||
headers=headers, query={
|
||||
'embed': 'show.name',
|
||||
'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
|
||||
'slug': display_id,
|
||||
'show_slug': show_slug,
|
||||
})[0]
|
||||
video_id = video['id']
|
||||
stream = self._download_json(
|
||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
e.cause.response.read().decode(), display_id)['description']
|
||||
if 'resource not available for country' in e_description:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
if 'Authorized Networks' in e_description:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
raise ExtractorError(e_description)
|
||||
raise
|
||||
|
||||
return self._extract_video_info(video, stream, display_id)
|
||||
@@ -1,171 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DiscoveryGoBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocitychannel
|
||||
)go\.com/%s(?P<id>[^/?#&]+)'''
|
||||
|
||||
def _extract_video_info(self, video, stream, display_id):
|
||||
title = video['name']
|
||||
|
||||
if not stream:
|
||||
if video.get('authenticated') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream')
|
||||
STREAM_URL_SUFFIX = 'streamUrl'
|
||||
formats = []
|
||||
for stream_kind in ('', 'hds'):
|
||||
suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
|
||||
stream_url = stream.get(f'{stream_kind}{suffix}')
|
||||
if not stream_url:
|
||||
continue
|
||||
if stream_kind == '':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif stream_kind == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, display_id, f4m_id=stream_kind, fatal=False))
|
||||
|
||||
video_id = video.get('id') or display_id
|
||||
description = video.get('description', {}).get('detailed')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
series = video.get('show', {}).get('name')
|
||||
season_number = int_or_none(video.get('season', {}).get('number'))
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
|
||||
tags = video.get('tags')
|
||||
age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
|
||||
|
||||
subtitles = {}
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
ext = determine_ext(subtitle_url)
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'tags': tags,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryGoIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
|
||||
'info_dict': {
|
||||
'id': '58c167d86b66d12f2addeb01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reaper Madness',
|
||||
'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
|
||||
'duration': 2519,
|
||||
'series': 'Bering Sea Gold',
|
||||
'season_number': 8,
|
||||
'episode_number': 6,
|
||||
'age_limit': 14,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
container = extract_attributes(
|
||||
self._search_regex(
|
||||
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
|
||||
webpage, 'video container'))
|
||||
|
||||
video = self._parse_json(
|
||||
container.get('data-video') or container.get('data-json'),
|
||||
display_id)
|
||||
|
||||
stream = video.get('stream')
|
||||
|
||||
return self._extract_video_info(video, stream, display_id)
|
||||
|
||||
|
||||
class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/bering-sea-gold/',
|
||||
'info_dict': {
|
||||
'id': 'bering-sea-gold',
|
||||
'title': 'Bering Sea Gold',
|
||||
'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if DiscoveryGoIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
|
||||
data = self._parse_json(
|
||||
mobj.group('json'), display_id,
|
||||
transform_source=unescapeHTML, fatal=False)
|
||||
if not isinstance(data, dict) or data.get('type') != 'episode':
|
||||
continue
|
||||
episode_url = data.get('socialUrl')
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
episode_url, ie=DiscoveryGoIE.ie_key(),
|
||||
video_id=data.get('id')))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, display_id,
|
||||
remove_end(self._og_search_title(
|
||||
webpage, fatal=False), ' | Discovery GO'),
|
||||
self._og_search_description(webpage))
|
||||
@@ -24,8 +24,9 @@ from ..utils import (
|
||||
class DouyuBaseIE(InfoExtractor):
|
||||
def _download_cryptojs_md5(self, video_id):
|
||||
for url in [
|
||||
# XXX: Do NOT use cdn.bootcdn.net; ref: https://sansec.io/research/polyfill-supply-chain-attack
|
||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://unpkg.com/cryptojslib@3.1.2/rollups/md5.js',
|
||||
]:
|
||||
js_code = self._download_webpage(
|
||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||
@@ -35,7 +36,8 @@ class DouyuBaseIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||
|
||||
def _get_cryptojs_md5(self, video_id):
|
||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||
return self.cache.load(
|
||||
'douyu', 'crypto-js-md5', min_ver='2024.07.04') or self._download_cryptojs_md5(video_id)
|
||||
|
||||
def _calc_sign(self, sign_func, video_id, a):
|
||||
b = uuid.uuid4().hex
|
||||
|
||||
@@ -319,35 +319,17 @@ class DPlayIE(DPlayBaseIE):
|
||||
url, display_id, host, 'dplay' + country, country, domain)
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
|
||||
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
|
||||
|
||||
_DISCO_CLIENT_VER = '27.43.0'
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6'
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:{self._DISCO_CLIENT_VER}',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
@@ -365,9 +347,68 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class HGTVDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'info_dict': {
|
||||
'id': '7332936',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'title': 'Vom Landleben ins Loft',
|
||||
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
|
||||
'episode': 'Episode 7',
|
||||
'episode_number': 7,
|
||||
'season': 'Season 7',
|
||||
'season_number': 7,
|
||||
'series': 'Mein Kleinstadt-Traumhaus',
|
||||
'duration': 2645.0,
|
||||
'timestamp': 1725998100,
|
||||
'upload_date': '20240910',
|
||||
'creators': ['HGTV'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
|
||||
},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod.disco-api.com',
|
||||
'realm': 'hgtv',
|
||||
'country': 'de',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.discovery.com/video/in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister',
|
||||
'info_dict': {
|
||||
'id': '5352642',
|
||||
'display_id': 'in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trapped in a Twister',
|
||||
'description': 'Twisters destroy Midwest towns, trapping spotters in the eye of the storm.',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'series': 'In The Eye Of The Storm',
|
||||
'duration': 2490.237,
|
||||
'upload_date': '20240715',
|
||||
'timestamp': 1721008800,
|
||||
'tags': [],
|
||||
'creators': ['Discovery'],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/10/5e39637d-cabf-3ab3-8e9a-f4e9d37bc036.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'info_dict': {
|
||||
'id': '4164906',
|
||||
@@ -395,6 +436,26 @@ class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.travelchannel.com/video/the-dead-files-travel-channel/protect-the-children',
|
||||
'info_dict': {
|
||||
'id': '4710177',
|
||||
'display_id': 'the-dead-files-travel-channel/protect-the-children',
|
||||
'ext': 'mp4',
|
||||
'title': 'Protect the Children',
|
||||
'description': 'An evil presence threatens an Ohio woman\'s children and marriage.',
|
||||
'season_number': 14,
|
||||
'season': 'Season 14',
|
||||
'episode_number': 10,
|
||||
'episode': 'Episode 10',
|
||||
'series': 'The Dead Files',
|
||||
'duration': 2550.481,
|
||||
'timestamp': 1664510400,
|
||||
'upload_date': '20220930',
|
||||
'tags': [],
|
||||
'creators': ['Travel Channel'],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/17/5e45eace-de5d-343a-9293-f400a2aa77d5.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'info_dict': {
|
||||
'id': '2220256',
|
||||
@@ -422,6 +483,26 @@ class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||
class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.cookingchanneltv.com/video/bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson',
|
||||
'info_dict': {
|
||||
'id': '5350005',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson',
|
||||
'title': 'Titans vs Marcus Samuelsson',
|
||||
'description': 'Marcus Samuelsson throws his legendary global tricks at the Titans.',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 3,
|
||||
'season': 'Season 3',
|
||||
'series': 'Bobby\'s Triple Threat',
|
||||
'duration': 2520.851,
|
||||
'upload_date': '20240710',
|
||||
'timestamp': 1720573200,
|
||||
'tags': [],
|
||||
'creators': ['Food Network'],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/04/529cd095-27ec-35c5-84e9-90ebd3e5d2da.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'info_dict': {
|
||||
'id': '2348634',
|
||||
@@ -449,6 +530,22 @@ class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||
class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.hgtv.com/video/flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip',
|
||||
'info_dict': {
|
||||
'id': '5025585',
|
||||
'display_id': 'flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flip or Flop: The Final Flip',
|
||||
'description': 'Tarek and Christina are going their separate ways after one last flip!',
|
||||
'series': 'Flip or Flop: The Final Flip',
|
||||
'duration': 2580.644,
|
||||
'upload_date': '20231101',
|
||||
'timestamp': 1698811200,
|
||||
'tags': [],
|
||||
'creators': ['HGTV'],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/11/27/455caa6c-1462-3f14-b63d-a026d7a5e6d3.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'info_dict': {
|
||||
'id': '4289736',
|
||||
@@ -476,6 +573,26 @@ class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||
class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.foodnetwork.com/video/guys-grocery-games-food-network/wild-in-the-aisles',
|
||||
'info_dict': {
|
||||
'id': '2152549',
|
||||
'display_id': 'guys-grocery-games-food-network/wild-in-the-aisles',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wild in the Aisles',
|
||||
'description': 'The chefs make spaghetti and meatballs with "Out of Stock" ingredients.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'Guy\'s Grocery Games',
|
||||
'tags': [],
|
||||
'creators': ['Food Network'],
|
||||
'duration': 2520.651,
|
||||
'upload_date': '20230623',
|
||||
'timestamp': 1687492800,
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/06/15/37fb5333-cad2-3dbb-af7c-c20ec77c89c6.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'info_dict': {
|
||||
'id': '4116449',
|
||||
@@ -503,6 +620,26 @@ class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||
class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.destinationamerica.com/video/bbq-pit-wars-destination-america/smoke-on-the-water',
|
||||
'info_dict': {
|
||||
'id': '2218409',
|
||||
'display_id': 'bbq-pit-wars-destination-america/smoke-on-the-water',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoke on the Water',
|
||||
'description': 'The pitmasters head to Georgia for the Smoke on the Water BBQ Festival.',
|
||||
'season_number': 2,
|
||||
'season': 'Season 2',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'BBQ Pit Wars',
|
||||
'tags': [],
|
||||
'creators': ['Destination America'],
|
||||
'duration': 2614.878,
|
||||
'upload_date': '20230623',
|
||||
'timestamp': 1687492800,
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/c0f8e85d-9a10-3e6f-8e43-f6faafa81ba2.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'info_dict': {
|
||||
'id': '4210904',
|
||||
@@ -530,6 +667,26 @@ class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||
class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.investigationdiscovery.com/video/deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca',
|
||||
'info_dict': {
|
||||
'id': '5341132',
|
||||
'display_id': 'deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca',
|
||||
'ext': 'mp4',
|
||||
'title': 'RIP Bianca',
|
||||
'description': 'A teenage influencer discovers an online world of threat, harm and danger.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 3,
|
||||
'episode': 'Episode 3',
|
||||
'series': 'Deadly Influence: The Social Media Murders',
|
||||
'creators': ['Investigation Discovery'],
|
||||
'tags': [],
|
||||
'duration': 2490.888,
|
||||
'upload_date': '20240618',
|
||||
'timestamp': 1718672400,
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/06/15/b567c774-9e44-3c6c-b0ba-db860a73e812.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||
'info_dict': {
|
||||
'id': '2139409',
|
||||
@@ -557,6 +714,26 @@ class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ahctv.com/video/blood-and-fury-americas-civil-war-ahc/battle-of-bull-run',
|
||||
'info_dict': {
|
||||
'id': '2139199',
|
||||
'display_id': 'blood-and-fury-americas-civil-war-ahc/battle-of-bull-run',
|
||||
'ext': 'mp4',
|
||||
'title': 'Battle of Bull Run',
|
||||
'description': 'Two untested armies clash in the first real battle of the Civil War.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'Blood and Fury: America\'s Civil War',
|
||||
'duration': 2612.509,
|
||||
'upload_date': '20220923',
|
||||
'timestamp': 1663905600,
|
||||
'creators': ['AHC'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/4af61bd7-d705-3108-82c4-1a6e541e20fa.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
@@ -584,6 +761,26 @@ class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||
class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.sciencechannel.com/video/spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets',
|
||||
'info_dict': {
|
||||
'id': '2347335',
|
||||
'display_id': 'spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mystery of the Dead Planets',
|
||||
'description': 'Astronomers unmask the truly destructive nature of the cosmos.',
|
||||
'season_number': 7,
|
||||
'season': 'Season 7',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'Space\'s Deepest Secrets',
|
||||
'duration': 2524.989,
|
||||
'upload_date': '20230128',
|
||||
'timestamp': 1674882000,
|
||||
'creators': ['Science'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/30/3796829d-aead-3f9a-bd8d-e49048b3cdca.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'info_dict': {
|
||||
'id': '2842849',
|
||||
@@ -608,36 +805,29 @@ class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class DIYNetworkIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bringing Beach Life to Texas',
|
||||
'description': 'The Pool Kings give a family a day at the beach in their own backyard.',
|
||||
'season_number': 10,
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'diy'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.diynetwork.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoverylife.com/video/er-files-discovery-life-atve-us/sweet-charity',
|
||||
'info_dict': {
|
||||
'id': '2347614',
|
||||
'display_id': 'er-files-discovery-life-atve-us/sweet-charity',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sweet Charity',
|
||||
'description': 'The staff at Charity Hospital treat a serious foot infection.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'ER Files',
|
||||
'duration': 2364.261,
|
||||
'upload_date': '20230721',
|
||||
'timestamp': 1689912000,
|
||||
'creators': ['Discovery Life'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/16/4b6f0124-360b-3546-b6a4-5552db886b86.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'info_dict': {
|
||||
'id': '2218238',
|
||||
@@ -665,6 +855,26 @@ class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||
class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.animalplanet.com/video/mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru',
|
||||
'info_dict': {
|
||||
'id': '4650835',
|
||||
'display_id': 'mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Demon of Peru',
|
||||
'description': 'In Peru, a farming village is being terrorized by a “man-like beast.”',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 4,
|
||||
'episode': 'Episode 4',
|
||||
'series': 'Mysterious Creatures with Forrest Galante',
|
||||
'duration': 2490.488,
|
||||
'upload_date': '20230111',
|
||||
'timestamp': 1673413200,
|
||||
'creators': ['Animal Planet'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/01/6dbaa833-9a2e-3fee-9381-c19eddf67c0c.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
||||
'info_dict': {
|
||||
'id': '3338923',
|
||||
@@ -692,6 +902,26 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
class TLCIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.tlc.com/video/90-day-the-last-resort-tlc-atve-us/the-last-chance',
|
||||
'info_dict': {
|
||||
'id': '5186422',
|
||||
'display_id': '90-day-the-last-resort-tlc-atve-us/the-last-chance',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Last Chance',
|
||||
'description': 'Infidelity shakes Kalani and Asuelu\'s world, and Angela threatens divorce.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': '90 Day: The Last Resort',
|
||||
'duration': 5123.91,
|
||||
'upload_date': '20230815',
|
||||
'timestamp': 1692061200,
|
||||
'creators': ['TLC'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2023/08/08/0ee367e2-ac76-334d-bf23-dbf796696a24.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'info_dict': {
|
||||
'id': '2206540',
|
||||
@@ -716,93 +946,8 @@ class TLCIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class MotorTrendIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
|
||||
'info_dict': {
|
||||
'id': '"4859182"',
|
||||
'display_id': 'double-dakotas',
|
||||
'ext': 'mp4',
|
||||
'title': 'Double Dakotas',
|
||||
'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
|
||||
'season_number': 2,
|
||||
'episode_number': 3,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'vel'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.motortrend.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
|
||||
'info_dict': {
|
||||
'id': '37699',
|
||||
'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback',
|
||||
'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7',
|
||||
'season_number': 5,
|
||||
'episode_number': 52,
|
||||
'episode': 'Episode 52',
|
||||
'season': 'Season 5',
|
||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
||||
'timestamp': 1388534401,
|
||||
'duration': 1887.345,
|
||||
'creator': 'Originals',
|
||||
'series': 'Roadkill',
|
||||
'upload_date': '20140101',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
|
||||
'info_dict': {
|
||||
'id': '4922860',
|
||||
'ext': 'mp4',
|
||||
'title': 'Roadworthy Rescues | Teaser Trailer',
|
||||
'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
|
||||
'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
|
||||
'creator': 'Originals',
|
||||
'series': 'Roadworthy Rescues',
|
||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
||||
'upload_date': '20220907',
|
||||
'timestamp': 1662523200,
|
||||
'duration': 1066.356,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'MTOD'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.motortrendondemand.com',
|
||||
'realm': 'motortrend',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
'info_dict': {
|
||||
@@ -823,14 +968,45 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
}, {
|
||||
'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus_us'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.discoveryplus.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
_PRODUCT = None
|
||||
_DISCO_API_PARAMS = None
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, country = self._match_valid_url(url).group('id', 'country')
|
||||
if not country:
|
||||
country = 'us'
|
||||
|
||||
self._PRODUCT = f'dplus_{country}'
|
||||
|
||||
if country in ('br', 'ca', 'us'):
|
||||
self._DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.discoveryplus.com',
|
||||
'realm': 'go',
|
||||
'country': country,
|
||||
}
|
||||
else:
|
||||
self._DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod-direct.discoveryplus.com',
|
||||
'realm': 'dplay',
|
||||
'country': country,
|
||||
}
|
||||
|
||||
return self._get_disco_api_info(url, video_id, **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
|
||||
@@ -984,16 +1160,22 @@ class DiscoveryPlusShowBaseIE(DPlayBaseIE):
|
||||
|
||||
|
||||
class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/olympics/dplus-sport-dplus-sport-sport/water-polo-greece-italy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/sport/dplus-sport-dplus-sport-sport/lisa-vittozzi-allinferno-e-ritorno',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus_us'
|
||||
_PRODUCT = 'dplus_it'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod-direct.discoveryplus.com',
|
||||
'realm': 'dplay',
|
||||
@@ -1002,8 +1184,8 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6',
|
||||
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
@@ -1044,39 +1226,3 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
|
||||
_SHOW_STR = 'show'
|
||||
_INDEX = 4
|
||||
_VIDEO_IE = DiscoveryPlusIndiaIE
|
||||
|
||||
|
||||
class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
|
||||
'info_dict': {
|
||||
'id': '1397691',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Athertons: Mountain Biking\'s Fastest Family',
|
||||
'description': 'md5:75a81937fcd8b989eec6083a709cd837',
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
|
||||
'series': 'gcn',
|
||||
'creator': 'Gcn',
|
||||
'upload_date': '20210309',
|
||||
'timestamp': 1615248000,
|
||||
'duration': 2531.0,
|
||||
'tags': [],
|
||||
},
|
||||
'skip': 'Subscription required',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'web'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
|
||||
'realm': 'gcn',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
@@ -6,8 +6,10 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,43 +38,58 @@ class DropboxIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def _yield_decoded_parts(self, webpage):
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = urllib.parse.unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if '/sm/password' in part:
|
||||
webpage = self._download_webpage(
|
||||
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
|
||||
break
|
||||
|
||||
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
if password:
|
||||
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
|
||||
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
|
||||
'password': password,
|
||||
'url': url,
|
||||
}))
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Authentication failed!', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
else:
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles, has_anonymous_download = [], {}, False
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
formats, subtitles = [], {}
|
||||
has_anonymous_download = False
|
||||
thumbnail = None
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if not has_anonymous_download:
|
||||
has_anonymous_download = self._search_regex(
|
||||
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
thumbnail = self._search_regex(
|
||||
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
|
||||
break
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
@@ -89,4 +106,5 @@ class DropboxIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
@@ -139,12 +139,11 @@ class DRTVIE(InfoExtractor):
|
||||
return
|
||||
|
||||
token_response = self._download_json(
|
||||
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
'https://isl.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
note='Downloading anonymous token', headers={
|
||||
'content-type': 'application/json',
|
||||
}, query={
|
||||
'device': 'web_browser',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'device': 'phone_android',
|
||||
'lang': 'da',
|
||||
'supportFallbackToken': 'true',
|
||||
}, data=json.dumps({
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
@@ -37,12 +42,9 @@ class EitbIE(InfoExtractor):
|
||||
if not video_url:
|
||||
continue
|
||||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += f'-{int(tbr)}'
|
||||
formats.append({
|
||||
'url': rendition['PMD_URL'],
|
||||
'format_id': format_id,
|
||||
'format_id': join_nonempty('http', int_or_none(tbr)),
|
||||
'width': int_or_none(rendition.get('FRAME_WIDTH')),
|
||||
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
|
||||
'tbr': tbr,
|
||||
|
||||
@@ -2,6 +2,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
@@ -13,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class EpidemicSoundIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/(?:(?P<sfx>sound-effects/tracks)|track)/(?P<id>[0-9a-zA-Z-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
|
||||
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
|
||||
@@ -47,6 +48,20 @@ class EpidemicSoundIE(InfoExtractor):
|
||||
'release_timestamp': 1700535606,
|
||||
'release_date': '20231121',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.epidemicsound.com/sound-effects/tracks/2f02f54b-9faa-4daf-abac-1cfe9e9cef69/',
|
||||
'md5': '35d7cf05bd8b614a84f0495a05de9388',
|
||||
'info_dict': {
|
||||
'id': '208931',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20240603',
|
||||
'timestamp': 1717436529,
|
||||
'categories': ['appliance'],
|
||||
'display_id': '6b2NXLURPr',
|
||||
'duration': 1.0,
|
||||
'title': 'Oven, Grill, Door Open 01',
|
||||
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -77,8 +92,10 @@ class EpidemicSoundIE(InfoExtractor):
|
||||
return f
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id)
|
||||
video_id, is_sfx = self._match_valid_url(url).group('id', 'sfx')
|
||||
json_data = self._download_json(join_nonempty(
|
||||
'https://www.epidemicsound.com/json/track',
|
||||
is_sfx and 'kosmos-id', video_id, delim='/'), video_id)
|
||||
|
||||
thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
|
||||
thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
|
||||
|
||||
@@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118',
|
||||
},
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
|
||||
@@ -207,7 +207,7 @@ class ERRJupiterIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': ('heading', {str}),
|
||||
'alt_title': ('subHeading', {str}),
|
||||
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
|
||||
'description': (('lead', 'body'), {clean_html}, filter),
|
||||
'timestamp': ('created', {int_or_none}),
|
||||
'modified_timestamp': ('updated', {int_or_none}),
|
||||
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
@@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
|
||||
def _extract_formats_and_subs(self, video_id):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(media_info, (
|
||||
'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
|
||||
'Formats', lambda _, v: url_or_none(v['Url']))):
|
||||
fmt_url = media['Url']
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return formats, subs
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
|
||||
class WatchESPNIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'info_dict': {
|
||||
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huddersfield vs. Burnley',
|
||||
'duration': 7500,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
'title': 'Abilene Chrstn vs. Texas Tech',
|
||||
'duration': 14166,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'info_dict': {
|
||||
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
|
||||
'duration': 8335,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'UC Davis vs. California',
|
||||
'duration': 9547,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'info_dict': {
|
||||
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Wheel - Episode 10',
|
||||
'duration': 3352,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'The College Football Show',
|
||||
'duration': 3639,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
|
||||
if not cookie:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
|
||||
id_token = self._download_json(
|
||||
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
|
||||
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
|
||||
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
|
||||
}).encode())['data']['token']['id_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'devices', video_id,
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'},
|
||||
@@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
|
||||
})['access_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
|
||||
'accounts/grant', video_id, payload={'id_token': id_token},
|
||||
headers={
|
||||
'Authorization': token,
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
|
||||
@@ -3,7 +3,12 @@ from ..utils import traverse_obj
|
||||
|
||||
|
||||
class EurosportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
|
||||
eurosport\.tvn24\.pl
|
||||
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||
'info_dict': {
|
||||
@@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
|
||||
'duration': 105.0,
|
||||
'upload_date': '20230518',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = None
|
||||
@@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
|
||||
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
|
||||
# but this method require to get sha256 hash
|
||||
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_initialize(self):
|
||||
if EurosportIE._TOKEN is None:
|
||||
@@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
|
||||
for stream_type in json_data['attributes']['streaming']:
|
||||
if stream_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
|
||||
elif stream_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
elif stream_type == 'mss':
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
@@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'duration': 3133.583,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
@@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
@@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'md5': '1659aa21fb3dd1585874f668e81a72c8',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
'duration': 44.181,
|
||||
},
|
||||
}, {
|
||||
# FIXME: unable to extract uploader, no formats found
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
@@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1511548260,
|
||||
'upload_date': '20171124',
|
||||
'uploader': 'Vickie Gentry',
|
||||
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
'duration': 148.224,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
@@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
@@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'duration': 4524.001,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
@@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'duration': 3.283,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
@@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# FIXME: Cannot parse data error
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
@@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
or get_first(post, ('event', 'event_creator', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
@@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
|
||||
'like_count': ('likers', 'count', {int}),
|
||||
'comment_count': ('total_comment_count', {int}),
|
||||
'repost_count': ('share_count_reduced', {parse_count}),
|
||||
}), get_all=False),
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@@ -555,11 +564,12 @@ class FacebookIE(InfoExtractor):
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
dash_manifest = traverse_obj(
|
||||
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
mpd_url=url_or_none(video.get('dash_manifest_url'))))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
@@ -571,16 +581,21 @@ class FacebookIE(InfoExtractor):
|
||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
rf'data-sjs>({{.*?{_filter}.*?}})</script>',
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
def yield_all_relay_data(_filter):
|
||||
for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage):
|
||||
yield self._parse_json(relay_data, video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
def extract_relay_data(_filter):
|
||||
return next(filter(None, yield_all_relay_data(_filter)), {})
|
||||
|
||||
def extract_relay_prefetched_data(_filter, target_keys=None):
|
||||
path = 'data'
|
||||
if target_keys is not None:
|
||||
path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys))
|
||||
return traverse_obj(yield_all_relay_data(_filter), (
|
||||
..., 'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
||||
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
@@ -591,7 +606,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
if not video_data:
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)',
|
||||
target_keys=('video', 'event', 'nodes', 'node', 'mediaset'))
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
@@ -603,12 +619,13 @@ class FacebookIE(InfoExtractor):
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
playable_url = fmt_data.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
if determine_ext(playable_url) == 'mpd':
|
||||
@@ -620,7 +637,10 @@ class FacebookIE(InfoExtractor):
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
extract_dash_manifest(fmt_data, formats)
|
||||
if not formats:
|
||||
# Do not append false positive entry w/o any formats
|
||||
return
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||
@@ -923,18 +943,21 @@ class FacebookReelIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -954,6 +977,7 @@ class FacebookAdsIE(InfoExtractor):
|
||||
'id': '899206155126718',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by Kandao',
|
||||
'description': 'md5:0822724069e3aca97cbed5dabbab282e',
|
||||
'uploader': 'Kandao',
|
||||
'uploader_id': '774114102743284',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
@@ -962,6 +986,22 @@ class FacebookAdsIE(InfoExtractor):
|
||||
'upload_date': '20231214',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# key 'watermarked_video_sd_url' missing
|
||||
'url': 'https://www.facebook.com/ads/library/?id=501152689226254',
|
||||
'info_dict': {
|
||||
'id': '501152689226254',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by mat.nawrocki',
|
||||
'description': 'md5:02a446ace7ff8c3c37a2892922492490',
|
||||
'uploader': 'mat.nawrocki',
|
||||
'uploader_id': '148586968341456',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1723452305,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20240812',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
|
||||
'info_dict': {
|
||||
@@ -1008,34 +1048,42 @@ class FacebookAdsIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
post_data = [self._parse_json(j, video_id, fatal=False)
|
||||
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
|
||||
data = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
|
||||
post_data = traverse_obj(
|
||||
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
|
||||
data = get_first(post_data, (
|
||||
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
|
||||
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
|
||||
if not data:
|
||||
raise ExtractorError('Unable to extract ad data')
|
||||
|
||||
title = data.get('title')
|
||||
if not title or title == '{{product.name}}':
|
||||
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
|
||||
markup_id = traverse_obj(data, ('body', '__m', {str}))
|
||||
markup = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'markup', lambda _, v: v[0].startswith(markup_id),
|
||||
..., '__html', {clean_html}, {lambda x: not x.startswith('{{product.') and x}, any))
|
||||
|
||||
info_dict = traverse_obj(data, {
|
||||
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
|
||||
info_dict = merge_dicts({
|
||||
'title': title,
|
||||
'description': markup or None,
|
||||
}, traverse_obj(data, {
|
||||
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
|
||||
'uploader': ('page_name', {str}),
|
||||
'uploader_id': ('page_id', {str_or_none}),
|
||||
'uploader_url': ('page_profile_uri', {url_or_none}),
|
||||
'timestamp': ('creation_time', {int_or_none}),
|
||||
'like_count': ('page_like_count', {int_or_none}),
|
||||
})
|
||||
}))
|
||||
|
||||
entries = []
|
||||
for idx, entry in enumerate(traverse_obj(
|
||||
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1,
|
||||
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v.get(f)) for f in self._FORMATS_MAP))), 1,
|
||||
):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{idx}',
|
||||
'title': entry.get('title') or title,
|
||||
'description': entry.get('link_description') or info_dict.get('description'),
|
||||
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
|
||||
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
|
||||
'formats': self._extract_formats(entry),
|
||||
})
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
format_field,
|
||||
@@ -33,6 +34,7 @@ class FranceTVIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
# tokenized url is in dinfo['video']['token']
|
||||
'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'info_dict': {
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
@@ -44,6 +46,19 @@ class FranceTVIE(InfoExtractor):
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# tokenized url is in dinfo['video']['token']['akamai']
|
||||
'url': 'francetv:c5bda21d-2c6f-4470-8849-3d8327adb2ba',
|
||||
'info_dict': {
|
||||
'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'timestamp': 1514118300,
|
||||
'duration': 2880,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20171224',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'francetv:162311093',
|
||||
'only_matching': True,
|
||||
@@ -68,6 +83,7 @@ class FranceTVIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, hostname=None):
|
||||
is_live = None
|
||||
videos = []
|
||||
drm_formats = False
|
||||
title = None
|
||||
subtitle = None
|
||||
episode_number = None
|
||||
@@ -85,13 +101,12 @@ class FranceTVIE(InfoExtractor):
|
||||
'device_type': device_type,
|
||||
'browser': browser,
|
||||
'domain': hostname,
|
||||
}), fatal=False)
|
||||
}), fatal=False, expected_status=422) # 422 json gives detailed error code/message
|
||||
|
||||
if not dinfo:
|
||||
continue
|
||||
|
||||
video = traverse_obj(dinfo, ('video', {dict}))
|
||||
if video:
|
||||
if video := traverse_obj(dinfo, ('video', {dict})):
|
||||
videos.append(video)
|
||||
if duration is None:
|
||||
duration = video.get('duration')
|
||||
@@ -99,9 +114,19 @@ class FranceTVIE(InfoExtractor):
|
||||
is_live = video.get('is_live')
|
||||
if spritesheets is None:
|
||||
spritesheets = video.get('spritesheets')
|
||||
elif code := traverse_obj(dinfo, ('code', {int})):
|
||||
if code == 2009:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
elif code in (2015, 2017):
|
||||
# 2015: L'accès à cette vidéo est impossible. (DRM-only)
|
||||
# 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM)
|
||||
drm_formats = True
|
||||
continue
|
||||
self.report_warning(
|
||||
f'{self.IE_NAME} said: {code} "{clean_html(dinfo.get("message"))}"')
|
||||
continue
|
||||
|
||||
meta = traverse_obj(dinfo, ('meta', {dict}))
|
||||
if meta:
|
||||
if meta := traverse_obj(dinfo, ('meta', {dict})):
|
||||
if title is None:
|
||||
title = meta.get('title')
|
||||
# meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
|
||||
@@ -114,12 +139,15 @@ class FranceTVIE(InfoExtractor):
|
||||
if timestamp is None:
|
||||
timestamp = parse_iso8601(meta.get('broadcasted_at'))
|
||||
|
||||
if not videos and drm_formats:
|
||||
self.report_drm(video_id)
|
||||
|
||||
formats, subtitles, video_url = [], {}, None
|
||||
for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
|
||||
video_url = video['url']
|
||||
format_id = video.get('format')
|
||||
|
||||
if token_url := url_or_none(video.get('token')):
|
||||
if token_url := traverse_obj(video, ('token', (None, 'akamai'), {url_or_none}, any)):
|
||||
tokenized_url = traverse_obj(self._download_json(
|
||||
token_url, video_id, f'Downloading signed {format_id} manifest URL',
|
||||
fatal=False, query={
|
||||
@@ -225,13 +253,13 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'timestamp': 1502623500,
|
||||
'duration': 2580,
|
||||
'timestamp': 1514118300,
|
||||
'duration': 2880,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170813',
|
||||
'upload_date': '20171224',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
||||
@@ -3,7 +3,7 @@ from .nexx import NexxIE
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8610449476156f338761a75391b0017d',
|
||||
@@ -27,6 +27,9 @@ class FunkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -8,6 +8,9 @@ from .common import InfoExtractor
|
||||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
MEDIA_EXTENSIONS,
|
||||
@@ -43,6 +46,7 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from ..utils._utils import _UnsafeExtensionError
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2167,7 +2171,15 @@ class GenericIE(InfoExtractor):
|
||||
urllib.parse.urlparse(fragment_query).query or fragment_query
|
||||
or urllib.parse.urlparse(manifest_url).query or None)
|
||||
|
||||
hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
|
||||
key_query = self._configuration_arg('key_query', [None], casesense=True)[0]
|
||||
if key_query is not None:
|
||||
info['extra_param_to_key_url'] = (
|
||||
urllib.parse.urlparse(key_query).query or key_query
|
||||
or urllib.parse.urlparse(manifest_url).query or None)
|
||||
|
||||
def hex_or_none(value):
|
||||
return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None
|
||||
|
||||
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
|
||||
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
|
||||
}) or None
|
||||
@@ -2331,7 +2343,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
if re.match(r'[^\s/]+\.[^\s/]+/', url):
|
||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2364,6 +2376,11 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
|
||||
impersonate = self._configuration_arg('impersonate', ['false'])
|
||||
if 'false' in impersonate:
|
||||
impersonate = None
|
||||
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||
@@ -2372,10 +2389,29 @@ class GenericIE(InfoExtractor):
|
||||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}))
|
||||
try:
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||
and e.cause.response.extensions.get('impersonate') is None):
|
||||
raise
|
||||
cf_cookie_domain = traverse_obj(
|
||||
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||
('__cf_bm', 'domain'))
|
||||
if cf_cookie_domain:
|
||||
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
|
||||
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
|
||||
'how to install the required impersonation dependency, and ')
|
||||
raise ExtractorError(
|
||||
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
|
||||
|
||||
new_url = full_response.url
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
@@ -2391,7 +2427,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||
@@ -2438,9 +2474,13 @@ class GenericIE(InfoExtractor):
|
||||
if not is_html(first_bytes):
|
||||
self.report_warning(
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
ext = determine_ext(url)
|
||||
if ext not in _UnsafeExtensionError.ALLOWED_EXTENSIONS:
|
||||
ext = 'unknown_video'
|
||||
info_dict.update({
|
||||
'direct': True,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
|
||||
91
yt_dlp/extractor/germanupa.py
Normal file
91
yt_dlp/extractor/germanupa.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GermanupaIE(InfoExtractor):
|
||||
IE_DESC = 'germanupa.de'
|
||||
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
|
||||
'info_dict': {
|
||||
'id': '909179246',
|
||||
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'duration': 3987,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'audio, uses GenericIE',
|
||||
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
|
||||
'info_dict': {
|
||||
'id': '1867346676',
|
||||
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
|
||||
'ext': 'opus',
|
||||
'timestamp': 1720545088,
|
||||
'upload_date': '20240709',
|
||||
'duration': 3910.557,
|
||||
'like_count': int,
|
||||
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
|
||||
'uploader': 'German UPA',
|
||||
'repost_count': int,
|
||||
'genres': ['Science'],
|
||||
'license': 'all-rights-reserved',
|
||||
'uploader_url': 'https://soundcloud.com/user-80097677',
|
||||
'uploader_id': '471579486',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
|
||||
},
|
||||
}, {
|
||||
'note': 'Nur für Mitglieder/Just for members',
|
||||
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
|
||||
'info_dict': {
|
||||
'id': '986994430',
|
||||
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20240719',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'timestamp': 1721373980,
|
||||
'license': 'by-sa',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
|
||||
'duration': 2146,
|
||||
'release_timestamp': 1721373980,
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'upload_date': '20240719',
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
param_url = traverse_obj(
|
||||
self._search_regex(
|
||||
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
|
||||
webpage, 'embedded video', default=None, group='url'),
|
||||
({parse_qs}, 'url', 0, {url_or_none}))
|
||||
|
||||
if not param_url:
|
||||
if self._search_regex(
|
||||
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
|
||||
webpage, 'login wrapper', default=None):
|
||||
self.raise_login_required('This video is only available for members')
|
||||
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
|
||||
|
||||
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
|
||||
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)
|
||||
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
remove_start,
|
||||
@@ -287,7 +288,7 @@ class GoIE(AdobePassIE):
|
||||
if mobj:
|
||||
height = int(mobj.group(2))
|
||||
f.update({
|
||||
'format_id': (f'{format_id}-' if format_id else '') + f'{height}P',
|
||||
'format_id': join_nonempty(format_id, f'{height}P'),
|
||||
'width': int(mobj.group(1)),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GolemIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||
|
||||
32
yt_dlp/extractor/graspop.py
Normal file
32
yt_dlp/extractor/graspop.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import update_url, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GraspopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/',
|
||||
'info_dict': {
|
||||
'id': '101556',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thy Art Is Murder',
|
||||
'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
# Downgrade manifest request to avoid incomplete certificate chain error
|
||||
update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('name', {str}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
urljoin,
|
||||
xpath_element,
|
||||
@@ -69,7 +70,7 @@ class HBOBaseIE(InfoExtractor):
|
||||
height = format_info.get('height')
|
||||
fmt = {
|
||||
'url': path,
|
||||
'format_id': 'http{}'.format(f'-{height}p' if height else ''),
|
||||
'format_id': join_nonempty('http'. height and f'{height}p'),
|
||||
'width': format_info.get('width'),
|
||||
'height': height,
|
||||
}
|
||||
|
||||
@@ -44,9 +44,6 @@ class HKETVIE(InfoExtractor):
|
||||
'duration': 907,
|
||||
'subtitles': {},
|
||||
},
|
||||
'params': {
|
||||
'geo_verification_proxy': '<HK proxy here>',
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}]
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
|
||||
@@ -8,15 +8,19 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
IE_NAME = 'huya:live'
|
||||
IE_DESC = 'huya.com'
|
||||
TESTS = [{
|
||||
@@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '572329',
|
||||
'title': str,
|
||||
'ext': 'flv',
|
||||
'description': str,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
@@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
|
||||
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
|
||||
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
|
||||
return fm, ss
|
||||
|
||||
|
||||
class HuyaVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
|
||||
IE_NAME = 'huya:video'
|
||||
IE_DESC = '虎牙视频'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.huya.com/video/play/1002412640.html',
|
||||
'info_dict': {
|
||||
'id': '1002412640',
|
||||
'ext': 'mp4',
|
||||
'title': '8月3日',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 14,
|
||||
'uploader': '虎牙-ATS欧卡车队青木',
|
||||
'uploader_id': '1564376151',
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.huya.com/video/play/556054543.html',
|
||||
'info_dict': {
|
||||
'id': '556054543',
|
||||
'ext': 'mp4',
|
||||
'title': '我不挑事 也不怕事',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 1864,
|
||||
'uploader': '卡尔',
|
||||
'uploader_id': '367138632',
|
||||
'upload_date': '20210811',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url: str):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent', video_id,
|
||||
query={'videoId': video_id})['data']['moment']['videoInfo']
|
||||
|
||||
formats = []
|
||||
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': definition['url'],
|
||||
**traverse_obj(definition, {
|
||||
'format_id': ('defName', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('videoCover', {url_or_none}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'uploader': ('nickName', {str}),
|
||||
'uploader_id': ('uid', {str_or_none}),
|
||||
'upload_date': ('videoUploadTime', {unified_strdate}),
|
||||
'view_count': ('videoPlayNum', {int_or_none}),
|
||||
'comment_count': ('videoCommentNum', {int_or_none}),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -63,7 +62,7 @@ class IlPostIE(InfoExtractor):
|
||||
'url': ('podcast_raw_url', {url_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('milliseconds', {float_or_none(scale=1000)}),
|
||||
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ImgurIE(ImgurBaseIE):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
@@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE):
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 'MRW gifv is up and running without any bugs',
|
||||
'timestamp': 1416446068,
|
||||
'upload_date': '20141120',
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1416446068,
|
||||
'release_date': '20141120',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
'only_matching': True,
|
||||
@@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE):
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1710491255,
|
||||
'release_date': '20240315',
|
||||
'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -208,7 +225,10 @@ class ImgurIE(ImgurBaseIE):
|
||||
}), get_all=False),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': url_or_none(search('thumbnailUrl')),
|
||||
'thumbnails': [{
|
||||
'url': thumbnail_url,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}
|
||||
|
||||
@@ -252,17 +272,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE):
|
||||
|
||||
class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:gallery'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/gallery/Q95ko',
|
||||
'info_dict': {
|
||||
'id': 'Q95ko',
|
||||
'title': 'Adding faces make every GIF better',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
'skip': 'Zoinks! You\'ve taken a wrong turn.',
|
||||
}, {
|
||||
# TODO: static images - replace with animated/video gallery
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'only_matching': True,
|
||||
@@ -280,7 +292,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'release_timestamp': 1358554297,
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'release_date': '20130119',
|
||||
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# Test with slug
|
||||
'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx',
|
||||
'add_ies': ['Imgur'],
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
'timestamp': 1358554297,
|
||||
'upload_date': '20130119',
|
||||
'uploader_id': '1648642',
|
||||
'uploader': 'wittyusernamehere',
|
||||
'release_timestamp': 1358554297,
|
||||
'release_date': '20130119',
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
@@ -317,6 +349,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ',
|
||||
'info_dict': {
|
||||
'id': '6lAn9VQ',
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
|
||||
'add_ies': ['Imgur'],
|
||||
@@ -357,7 +396,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
|
||||
class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:album'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
_GALLERY = False
|
||||
_TESTS = [{
|
||||
# TODO: only static images - replace with animated/video gallery
|
||||
@@ -372,6 +411,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX',
|
||||
'info_dict': {
|
||||
'id': 'iX265HX',
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://imgur.com/a/8pih2Ed',
|
||||
'info_dict': {
|
||||
|
||||
@@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor):
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@@ -435,10 +434,10 @@ class InstagramIE(InstagramBaseIE):
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'doc_id': '8845758582119845',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
||||
|
||||
if not general_info:
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
@@ -453,7 +452,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
else:
|
||||
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
|
||||
webpage = self._download_webpage(
|
||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
|
||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or ''
|
||||
additional_data = self._search_json(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
|
||||
if not additional_data and not media:
|
||||
|
||||
@@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
|
||||
'id': 'p51388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
|
||||
'upload_date': '20201103',
|
||||
'timestamp': 1604437480,
|
||||
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
|
||||
'episode': 'Partička (92)',
|
||||
'season': 'Partička',
|
||||
'series': 'Prima Partička',
|
||||
'episode_number': 92,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
|
||||
'info_dict': {
|
||||
'id': 'p1412199',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 3,
|
||||
'episode': 'Tenerife: V říši ohně',
|
||||
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
|
||||
'duration': 3111.0,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
|
||||
'title': 'Tenerife: V říši ohně',
|
||||
'timestamp': 1711825800,
|
||||
'upload_date': '20240330',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
@@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
|
||||
video_id = self._search_regex((
|
||||
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
@@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
|
||||
final_result = self._search_json_ld(webpage, video_id, default={})
|
||||
final_result.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': final_result.get('title') or title,
|
||||
'thumbnail': self._html_search_meta(
|
||||
['thumbnail', 'og:image', 'twitter:image'],
|
||||
webpage, 'thumbnail', default=None),
|
||||
|
||||
@@ -2,7 +2,6 @@ import functools
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
|
||||
|
||||
|
||||
class SangiinInstructionIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
IE_DESC = False # this shouldn't be listed as a supported site
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
||||
raise ExtractorError(
|
||||
'Copy the link from the button below the video description/player '
|
||||
'and use that link to download. If there is no button in the frame, '
|
||||
'get the URL of the frame showing the video.', expected=True)
|
||||
|
||||
|
||||
class SangiinIE(InfoExtractor):
|
||||
|
||||
@@ -326,11 +326,11 @@ class JioCinemaIE(JioCinemaBaseIE):
|
||||
# fallback metadata
|
||||
'title': ('name', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('show', 'name', {str}, {lambda x: x or None}),
|
||||
'series': ('show', 'name', {str}, filter),
|
||||
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('totalDuration', {float_or_none}),
|
||||
'thumbnail': ('images', {url_or_none}),
|
||||
@@ -338,10 +338,10 @@ class JioCinemaIE(JioCinemaBaseIE):
|
||||
**traverse_obj(metadata, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}, {lambda x: x or None}),
|
||||
'season': ('seasonName', {str}, {lambda x: x or None}),
|
||||
'series': ('showName', {str}, filter),
|
||||
'season': ('seasonName', {str}, filter),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'season_id': ('seasonId', {str}, {lambda x: x or None}),
|
||||
'season_id': ('seasonId', {str}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
@@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE):
|
||||
'title': 'naagin',
|
||||
},
|
||||
'playlist_mincount': 120,
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
|
||||
'info_dict': {
|
||||
'id': '3499820',
|
||||
'title': 'mtv-splitsvilla-x5',
|
||||
},
|
||||
'playlist_mincount': 310,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id):
|
||||
seasons = self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
|
||||
'Downloading series metadata JSON', query={
|
||||
'sort': 'season:asc',
|
||||
'id': series_id,
|
||||
'responseType': 'common',
|
||||
})
|
||||
seasons = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
|
||||
'Downloading series metadata JSON', query={'responseType': 'common'}), (
|
||||
'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
|
||||
'trayTabs', lambda _, v: v['id']))
|
||||
|
||||
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
|
||||
for season_num, season in enumerate(seasons, start=1):
|
||||
season_id = season['id']
|
||||
label = season.get('season') or season_num
|
||||
label = season.get('label') or season_num
|
||||
for page_num in itertools.count(1):
|
||||
episodes = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
||||
|
||||
@@ -158,7 +158,7 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
|
||||
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||
'info_dict': {
|
||||
@@ -173,6 +173,13 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
'title': 'Mood Hindi',
|
||||
},
|
||||
'playlist_mincount': 801,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
|
||||
'info_dict': {
|
||||
'id': 'Me5RridRfDk_',
|
||||
'title': 'Taaza Tunes',
|
||||
},
|
||||
'playlist_mincount': 301,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
|
||||
@@ -3,43 +3,52 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
|
||||
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video['youtubeId'],
|
||||
'id': video.get('slug'),
|
||||
'title': video.get('title'),
|
||||
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'description': video.get('description'),
|
||||
'id': video['youtubeId'],
|
||||
'ie_key': 'Youtube',
|
||||
**traverse_obj(video, {
|
||||
'display_id': ('id', {str_or_none}),
|
||||
'title': ('translatedTitle', {str}),
|
||||
'thumbnail': ('thumbnailUrls', ..., 'url', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'description': ('description', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.khanacademy.org/api/internal/graphql/FetchContentData',
|
||||
display_id, query={
|
||||
'https://www.khanacademy.org/api/internal/graphql/ContentForPath', display_id,
|
||||
query={
|
||||
'fastly_cacheable': 'persist_until_publish',
|
||||
'hash': '4134764944',
|
||||
'lang': 'en',
|
||||
'pcv': self._PUBLISHED_CONTENT_VERSION,
|
||||
'hash': '3712657851',
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'queryParams': 'lang=en',
|
||||
'isModal': False,
|
||||
'followRedirects': True,
|
||||
'countryCode': 'US',
|
||||
'kaLocale': 'en',
|
||||
'clientPublishedContentVersion': self._PUBLISHED_CONTENT_VERSION,
|
||||
}),
|
||||
})['data']['contentJson']
|
||||
return self._parse_component_props(self._parse_json(content, display_id)['componentProps'])
|
||||
'lang': 'en',
|
||||
})['data']['contentRoute']['listedPathData']
|
||||
return self._parse_component_props(content, display_id)
|
||||
|
||||
|
||||
class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
@@ -47,64 +56,98 @@ class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
|
||||
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
|
||||
'md5': '1d5c2e70fa6aa29c38eca419f12515ce',
|
||||
'info_dict': {
|
||||
'id': 'FlIG3TvQCBQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'The one-time pad',
|
||||
'description': 'The perfect cipher',
|
||||
'display_id': '716378217',
|
||||
'duration': 176,
|
||||
'uploader': 'Brit Cruise',
|
||||
'uploader_id': 'khanacademy',
|
||||
'uploader': 'Khan Academy',
|
||||
'uploader_id': '@khanacademy',
|
||||
'uploader_url': 'https://www.youtube.com/@khanacademy',
|
||||
'upload_date': '20120411',
|
||||
'timestamp': 1334170113,
|
||||
'license': 'cc-by-nc-sa',
|
||||
'live_status': 'not_live',
|
||||
'channel': 'Khan Academy',
|
||||
'channel_id': 'UC4a-Gbdw7vOaccHmFo40b9g',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g',
|
||||
'channel_is_verified': True,
|
||||
'playable_in_embed': True,
|
||||
'categories': ['Education'],
|
||||
'creators': ['Brit Cruise'],
|
||||
'tags': [],
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'thumbnail': str,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'heatmap': list,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
video = component_props['tutorialPageData']['contentModel']
|
||||
info = self._parse_video(video)
|
||||
author_names = video.get('authorNames')
|
||||
info.update({
|
||||
'uploader': ', '.join(author_names) if author_names else None,
|
||||
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||
'license': video.get('kaUserLicense'),
|
||||
})
|
||||
return info
|
||||
def _parse_component_props(self, component_props, display_id):
|
||||
video = component_props['content']
|
||||
return {
|
||||
**self._parse_video(video),
|
||||
**traverse_obj(video, {
|
||||
'creators': ('authorNames', ..., {str}),
|
||||
'timestamp': ('dateAdded', {parse_iso8601}),
|
||||
'license': ('kaUserLicense', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KhanAcademyUnitIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy:unit'
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('1,2', '')) + '/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
|
||||
'info_dict': {
|
||||
'id': 'cryptography',
|
||||
'id': 'x48c910b6',
|
||||
'title': 'Cryptography',
|
||||
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
||||
'display_id': 'computing/computer-science/cryptography',
|
||||
'_old_archive_ids': ['khanacademyunit cryptography'],
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science',
|
||||
'info_dict': {
|
||||
'id': 'x301707a0',
|
||||
'title': 'Computer science theory',
|
||||
'description': 'md5:4b472a4646e6cf6ec4ccb52c4062f8ba',
|
||||
'display_id': 'computing/computer-science',
|
||||
'_old_archive_ids': ['khanacademyunit computer-science'],
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}]
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
curation = component_props['curation']
|
||||
def _parse_component_props(self, component_props, display_id):
|
||||
course = component_props['course']
|
||||
selected_unit = traverse_obj(course, (
|
||||
'unitChildren', lambda _, v: v['relativeUrl'] == f'/{display_id}', any)) or course
|
||||
|
||||
entries = []
|
||||
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
|
||||
for tutorial_number, tutorial in enumerate(tutorials, 1):
|
||||
chapter_info = {
|
||||
'chapter': tutorial.get('title'),
|
||||
'chapter_number': tutorial_number,
|
||||
'chapter_id': tutorial.get('id'),
|
||||
}
|
||||
for content_item in (tutorial.get('contentItems') or []):
|
||||
if content_item.get('kind') == 'Video':
|
||||
info = self._parse_video(content_item)
|
||||
info.update(chapter_info)
|
||||
entries.append(info)
|
||||
def build_entry(entry):
|
||||
return self.url_result(urljoin(
|
||||
'https://www.khanacademy.org', entry['canonicalUrl']),
|
||||
KhanAcademyIE, title=entry.get('translatedTitle'))
|
||||
|
||||
entries = traverse_obj(selected_unit, (
|
||||
(('unitChildren', ...), None), 'allOrderedChildren', ..., 'curatedChildren',
|
||||
lambda _, v: v['contentKind'] == 'Video' and v['canonicalUrl'], {build_entry}))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, curation.get('unit'), curation.get('title'),
|
||||
curation.get('description'))
|
||||
entries,
|
||||
display_id=display_id,
|
||||
**traverse_obj(selected_unit, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('translatedTitle', {str}),
|
||||
'description': ('translatedDescription', {str}),
|
||||
'_old_archive_ids': ('slug', {str}, {lambda x: [make_archive_id(self, x)] if x else None}),
|
||||
}))
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
@@ -25,104 +29,212 @@ class KickBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
||||
return self._download_json(
|
||||
f'https://kick.com/api/v1/{path}', display_id, note=note,
|
||||
f'https://kick.com/api/{path}', display_id, note=note,
|
||||
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
|
||||
|
||||
|
||||
class KickIE(KickBaseIE):
|
||||
IE_NAME = 'kick:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/yuppy',
|
||||
'url': 'https://kick.com/buddha',
|
||||
'info_dict': {
|
||||
'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
|
||||
'id': '92722911-nopixel-40',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'channel': 'yuppy',
|
||||
'channel_id': '33538',
|
||||
'uploader': 'Yuppy',
|
||||
'uploader_id': '33793',
|
||||
'upload_date': str,
|
||||
'live_status': 'is_live',
|
||||
'timestamp': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': list,
|
||||
'upload_date': str,
|
||||
'channel': 'buddha',
|
||||
'channel_id': '32807',
|
||||
'uploader': 'Buddha',
|
||||
'uploader_id': '33057',
|
||||
'live_status': 'is_live',
|
||||
'concurrent_view_count': int,
|
||||
'release_timestamp': int,
|
||||
'age_limit': 18,
|
||||
'release_date': str,
|
||||
},
|
||||
'skip': 'livestream',
|
||||
'params': {'skip_download': 'livestream'},
|
||||
# 'skip': 'livestream',
|
||||
}, {
|
||||
'url': 'https://kick.com/kmack710',
|
||||
'url': 'https://kick.com/xqc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
response = self._call_api(f'channels/{channel}', channel)
|
||||
response = self._call_api(f'v2/channels/{channel}', channel)
|
||||
if not traverse_obj(response, 'livestream', expected_type=dict):
|
||||
raise UserNotLive(video_id=channel)
|
||||
|
||||
return {
|
||||
'id': str(traverse_obj(
|
||||
response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
response['playback_url'], channel, 'mp4', live=True),
|
||||
'title': traverse_obj(
|
||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
||||
'description': traverse_obj(response, ('user', 'bio')),
|
||||
'channel': channel,
|
||||
'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
|
||||
'uploader': traverse_obj(response, 'name', ('user', 'username')),
|
||||
'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
|
||||
'is_live': True,
|
||||
'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
|
||||
'thumbnail': traverse_obj(
|
||||
response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
|
||||
'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
|
||||
'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True),
|
||||
**traverse_obj(response, {
|
||||
'id': ('livestream', 'slug', {str}),
|
||||
'title': ('livestream', 'session_title', {str}),
|
||||
'description': ('user', 'bio', {str}),
|
||||
'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any),
|
||||
'uploader': (('name', ('user', 'username')), {str}, any),
|
||||
'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any),
|
||||
'timestamp': ('livestream', 'created_at', {unified_timestamp}),
|
||||
'release_timestamp': ('livestream', 'start_time', {unified_timestamp}),
|
||||
'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}),
|
||||
'categories': ('recent_categories', ..., 'name', {str}),
|
||||
'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}),
|
||||
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KickVODIE(KickBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
IE_NAME = 'kick:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
|
||||
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
||||
'info_dict': {
|
||||
'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
|
||||
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'ext': 'mp4',
|
||||
'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
|
||||
'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
|
||||
'channel': 'jaredfps',
|
||||
'channel_id': '26608',
|
||||
'uploader': 'JaredFPS',
|
||||
'uploader_id': '26799',
|
||||
'upload_date': '20240402',
|
||||
'timestamp': 1712097108,
|
||||
'duration': 33859.0,
|
||||
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
|
||||
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
|
||||
'channel': 'xqc',
|
||||
'channel_id': '668',
|
||||
'uploader': 'xQc',
|
||||
'uploader_id': '676',
|
||||
'upload_date': '20240909',
|
||||
'timestamp': 1725919141,
|
||||
'duration': 10155.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'categories': ['Call of Duty: Warzone'],
|
||||
'view_count': int,
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': [r'impersonation'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
response = self._call_api(f'video/{video_id}', video_id)
|
||||
response = self._call_api(f'v1/video/{video_id}', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
|
||||
'title': traverse_obj(
|
||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
||||
'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
|
||||
'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
|
||||
'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
|
||||
'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
|
||||
'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
|
||||
'timestamp': unified_timestamp(response.get('created_at')),
|
||||
'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
|
||||
'thumbnail': traverse_obj(
|
||||
response, ('livestream', 'thumbnail'), expected_type=url_or_none),
|
||||
'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
|
||||
**traverse_obj(response, {
|
||||
'title': ('livestream', ('session_title', 'slug'), {str}, any),
|
||||
'description': ('livestream', 'channel', 'user', 'bio', {str}),
|
||||
'channel': ('livestream', 'channel', 'slug', {str}),
|
||||
'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}),
|
||||
'uploader': ('livestream', 'channel', 'user', 'username', {str}),
|
||||
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'duration': ('livestream', 'duration', {float_or_none(scale=1000)}),
|
||||
'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
|
||||
'categories': ('livestream', 'categories', ..., 'name', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KickClipIE(KickBaseIE):
|
||||
IE_NAME = 'kick:clips'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'info_dict': {
|
||||
'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maddy detains Abd D:',
|
||||
'channel': 'mxddy',
|
||||
'channel_id': '133789',
|
||||
'uploader': 'AbdCreates',
|
||||
'uploader_id': '3309077',
|
||||
'thumbnail': r're:^https?://.*\.jpeg',
|
||||
'duration': 35,
|
||||
'timestamp': 1682481453,
|
||||
'upload_date': '20230426',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['VALORANT'],
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3',
|
||||
'info_dict': {
|
||||
'id': 'clip_01H9SKET879NE7N9RJRRDS98J3',
|
||||
'title': 'W jews',
|
||||
'ext': 'mp4',
|
||||
'channel': 'destiny',
|
||||
'channel_id': '1772249',
|
||||
'uploader': 'punished_furry',
|
||||
'uploader_id': '2027722',
|
||||
'duration': 49.0,
|
||||
'upload_date': '20230908',
|
||||
'timestamp': 1694150180,
|
||||
'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'info_dict': {
|
||||
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'ext': 'mp4',
|
||||
'title': 'KLJASLDJKLJKASDLJKDAS',
|
||||
'channel': 'spreen',
|
||||
'channel_id': '5312671',
|
||||
'uploader': 'AnormalBarraBaja',
|
||||
'uploader_id': '26518262',
|
||||
'duration': 43.0,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727399987,
|
||||
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Minecraft'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
clip_id = self._match_id(url)
|
||||
clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip']
|
||||
clip_url = clip['clip_url']
|
||||
|
||||
if determine_ext(clip_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4')
|
||||
else:
|
||||
formats = [{'url': clip_url}]
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(clip, {
|
||||
'title': ('title', {str}),
|
||||
'channel': ('channel', 'slug', {str}),
|
||||
'channel_id': ('channel', 'id', {int}, {str_or_none}),
|
||||
'uploader': ('creator', 'username', {str}),
|
||||
'uploader_id': ('creator', 'id', {int}, {str_or_none}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'categories': ('category', 'name', {str}, all),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
126
yt_dlp/extractor/kika.py
Normal file
126
yt_dlp/extractor/kika.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class KikaIE(InfoExtractor):
|
||||
IE_DESC = 'KiKA.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
|
||||
'info_dict': {
|
||||
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240831',
|
||||
'timestamp': 1725126600,
|
||||
'season_number': 2024,
|
||||
'modified_date': '20240831',
|
||||
'episode': 'Episode 476',
|
||||
'episode_number': 476,
|
||||
'season': 'Season 2024',
|
||||
'duration': 634,
|
||||
'title': 'logo! vom Samstag, 31. August 2024',
|
||||
'modified_timestamp': 1725129983,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/kaltstart/videos/video92498',
|
||||
'md5': '710ece827e5055094afeb474beacb7aa',
|
||||
'info_dict': {
|
||||
'id': 'video92498',
|
||||
'ext': 'mp4',
|
||||
'title': '7. Wo ist Leo?',
|
||||
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
|
||||
'duration': 436,
|
||||
'timestamp': 1702926876,
|
||||
'upload_date': '20231218',
|
||||
'episode_number': 7,
|
||||
'modified_date': '20240319',
|
||||
'modified_timestamp': 1710880610,
|
||||
'episode': 'Episode 7',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
|
||||
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
|
||||
'info_dict': {
|
||||
'id': 'video90088',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20221102',
|
||||
'timestamp': 1667390580,
|
||||
'duration': 197,
|
||||
'modified_timestamp': 1711093771,
|
||||
'episode_number': 8,
|
||||
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
|
||||
'modified_date': '20240322',
|
||||
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 8',
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
|
||||
video_assets = self._download_json(doc['assets']['url'], video_id)
|
||||
|
||||
subtitles = {}
|
||||
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
|
||||
subtitles['de'] = [{
|
||||
'url': ttml_resource,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': webvtt_resource,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': list(self._extract_formats(video_assets, video_id)),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(doc, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'modified_timestamp': ('modificationDate', {parse_iso8601}),
|
||||
'duration': ((
|
||||
('durationInSeconds', {int_or_none}),
|
||||
('duration', {parse_duration})), any),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
|
||||
stream_url = media['url']
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'm3u8':
|
||||
yield from self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
yield {
|
||||
'url': stream_url,
|
||||
'format_id': ext,
|
||||
**traverse_obj(media, {
|
||||
'width': ('frameWidth', {int_or_none}),
|
||||
'height': ('frameHeight', {int_or_none}),
|
||||
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
|
||||
'filesize': ('fileSize', {int_or_none}, filter),
|
||||
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
}),
|
||||
}
|
||||
114
yt_dlp/extractor/laracasts.py
Normal file
114
yt_dlp/extractor/laracasts.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LaracastsBaseIE(InfoExtractor):
|
||||
def _get_prop_data(self, url, display_id):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return traverse_obj(
|
||||
get_element_html_by_id('app', webpage),
|
||||
({extract_attributes}, 'data-page', {json.loads}, 'props'))
|
||||
|
||||
def _parse_episode(self, episode):
|
||||
if not traverse_obj(episode, 'vimeoId'):
|
||||
self.raise_login_required('This video is only available for subscribers.')
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
|
||||
VimeoIE, url_transparent=True,
|
||||
**traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'webpage_url': ('path', {urljoin('https://laracasts.com')}),
|
||||
'title': ('title', {clean_html}),
|
||||
'season_number': ('chapter', {int_or_none}),
|
||||
'episode_number': ('position', {int_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('largeThumbnail', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'date': ('dateSegments', 'published', {unified_strdate}),
|
||||
}))
|
||||
|
||||
|
||||
class LaracastsIE(LaracastsBaseIE):
|
||||
IE_NAME = 'laracasts'
|
||||
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
|
||||
'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
|
||||
'info_dict': {
|
||||
'id': '922040563',
|
||||
'title': 'Hello, Laravel',
|
||||
'ext': 'mp4',
|
||||
'duration': 519,
|
||||
'date': '20240312',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
|
||||
'description': 'md5:ddd658bb241975871d236555657e1dd1',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'uploader': 'Laracasts',
|
||||
'uploader_id': 'user20182673',
|
||||
'uploader_url': 'https://vimeo.com/user20182673',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
|
||||
|
||||
|
||||
class LaracastsPlaylistIE(LaracastsBaseIE):
|
||||
IE_NAME = 'laracasts:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
|
||||
'info_dict': {
|
||||
'title': '30 Days to Learn Laravel',
|
||||
'id': '210',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
|
||||
'duration': 30600.0,
|
||||
'modified_date': '20240511',
|
||||
'description': 'md5:27c260a1668a450984e8f901579912dd',
|
||||
'categories': ['Frameworks'],
|
||||
'tags': ['Laravel'],
|
||||
'display_id': '30-days-to-learn-laravel-11',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
series = self._get_prop_data(url, display_id)['series']
|
||||
|
||||
metadata = {
|
||||
'display_id': display_id,
|
||||
**traverse_obj(series, {
|
||||
'title': ('title', {str}),
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
|
||||
'duration': ('runTime', {parse_duration}),
|
||||
'categories': ('taxonomy', 'name', {str}, all, filter),
|
||||
'tags': ('topics', ..., 'name', {str}),
|
||||
'modified_date': ('lastUpdated', {unified_strdate}),
|
||||
}),
|
||||
}
|
||||
|
||||
return self.playlist_result(traverse_obj(
|
||||
series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)
|
||||
@@ -66,7 +66,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
'license': ('value', 'license', {str}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'release_timestamp': ('value', 'release_time', {int_or_none}),
|
||||
'tags': ('value', 'tags', ..., {lambda x: x or None}),
|
||||
'tags': ('value', 'tags', ..., filter),
|
||||
'duration': ('value', stream_type, 'duration', {int_or_none}),
|
||||
'channel': ('signing_channel', 'value', 'title', {str}),
|
||||
'channel_id': ('signing_channel', 'claim_id', {str}),
|
||||
@@ -136,6 +136,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
IE_DESC = 'odysee.com'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
|
||||
(?:\$/(?:download|embed)/)?
|
||||
(?P<id>
|
||||
@@ -364,6 +365,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
IE_DESC = 'odysee.com channels'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
@@ -391,6 +393,7 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
|
||||
class LBRYPlaylistIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:playlist'
|
||||
IE_DESC = 'odysee.com playlists'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
|
||||
72
yt_dlp/extractor/learningonscreen.py
Normal file
72
yt_dlp/extractor/learningonscreen.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class LearningOnScreenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
|
||||
'info_dict': {
|
||||
'id': '005D81B2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planet Earth',
|
||||
'duration': 3600.0,
|
||||
'timestamp': 1164567600.0,
|
||||
'upload_date': '20061126',
|
||||
'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||
self.raise_login_required(method='session_cookies')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
details = traverse_obj(webpage, (
|
||||
{find_element(id='programme-details', html=True)}, {
|
||||
'title': ({find_element(tag='h2')}, {clean_html}),
|
||||
'timestamp': (
|
||||
{find_element(cls='broadcast-date')},
|
||||
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
|
||||
'duration': (
|
||||
{find_element(cls='prog-running-time')}, {clean_html}, {parse_duration}),
|
||||
}))
|
||||
|
||||
title = details.pop('title', None) or traverse_obj(webpage, (
|
||||
{find_element(id='add-to-existing-playlist', html=True)},
|
||||
{extract_attributes}, 'data-record-title', {clean_html}))
|
||||
|
||||
entries = self._parse_html5_media_entries(
|
||||
'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
|
||||
_headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
|
||||
if not entries:
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
if len(entries) > 1:
|
||||
duration = details.pop('duration', None)
|
||||
for idx, entry in enumerate(entries, start=1):
|
||||
entry.update(details)
|
||||
entry['id'] = join_nonempty(video_id, idx)
|
||||
entry['title'] = join_nonempty(title, idx)
|
||||
return self.playlist_result(entries, video_id, title, duration=duration)
|
||||
|
||||
return {
|
||||
**entries[0],
|
||||
**details,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
@@ -6,12 +6,10 @@ from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
get_element_text_and_html_by_tag,
|
||||
parse_duration,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class ListenNotesIE(InfoExtractor):
|
||||
@@ -22,14 +20,14 @@ class ListenNotesIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'KrDgvNb_u1n',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:32236591a921adf17bbdbf0441b6c0e9',
|
||||
'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd',
|
||||
'duration': 2148.0,
|
||||
'channel': 'Thriving on Overload',
|
||||
'title': r're:Tim O’Reilly on noticing things other people .{113}',
|
||||
'description': r're:(?s)‘’We shape reality by what we notice and .{27459}',
|
||||
'duration': 2215.0,
|
||||
'channel': 'Amplifying Cognition',
|
||||
'channel_id': 'ed84wITivxF',
|
||||
'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/',
|
||||
'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/amplifying-cognition-ross-dawson-Iemft4Gdr0k-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/amplifying-cognition-ross-dawson-ed84wITivxF/',
|
||||
'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
|
||||
},
|
||||
}, {
|
||||
@@ -39,13 +37,13 @@ class ListenNotesIE(InfoExtractor):
|
||||
'id': 'lwEA3154JzG',
|
||||
'ext': 'mp3',
|
||||
'title': 'Episode 177: WireGuard with Jason Donenfeld',
|
||||
'description': 'md5:24744f36456a3e95f83c1193a3458594',
|
||||
'description': r're:(?s)Jason Donenfeld lead developer joins us this hour to discuss WireGuard, .{3169}',
|
||||
'duration': 3861.0,
|
||||
'channel': 'Ask Noah Show',
|
||||
'channel_id': '4DQTzdS5-j7',
|
||||
'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg',
|
||||
'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-gD7vG150cxf-4DQTzdS5-j7.300x300.jpg',
|
||||
'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
|
||||
},
|
||||
}]
|
||||
@@ -70,7 +68,7 @@ class ListenNotesIE(InfoExtractor):
|
||||
'id': audio_id,
|
||||
'url': data['audio'],
|
||||
'title': (data.get('data-title')
|
||||
or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
|
||||
or traverse_obj(webpage, ({find_element(tag='h1')}, {clean_html}))
|
||||
or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
|
||||
'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
|
||||
or strip_or_none(description)),
|
||||
|
||||
@@ -1,86 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||
'info_dict': {
|
||||
'id': '10809',
|
||||
'ext': 'mp4',
|
||||
'title': "Put'ka: Trys Klausimai",
|
||||
'upload_date': '20161216',
|
||||
'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
|
||||
'age_limit': 18,
|
||||
'duration': 117,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1481904000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
|
||||
'info_dict': {
|
||||
'id': '10467',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nėrdas: Kompiuterio Valymas',
|
||||
'upload_date': '20150113',
|
||||
'description': 'md5:7352d113a242a808676ff17e69db6a69',
|
||||
'age_limit': 18,
|
||||
'duration': 346,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421164800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_LIMITS = {
|
||||
'N-7': 7,
|
||||
'N-14': 14,
|
||||
'S': 18,
|
||||
}
|
||||
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
|
||||
display_id)['videoConfig']['videoInfo']
|
||||
|
||||
video_id = str(video_info['id'])
|
||||
title = video_info['title']
|
||||
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'description': clean_html(video_info.get('htmlDescription')),
|
||||
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
|
||||
'timestamp': parse_iso8601(video_info.get('airDate')),
|
||||
'view_count': int_or_none(video_info.get('viewsCount')),
|
||||
}
|
||||
|
||||
|
||||
class LnkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'
|
||||
|
||||
@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response
|
||||
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': '8488817242a0db1cb2ad0ea522553cf6',
|
||||
'md5': 'b321d261656848c184a94e3b93eae28d',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
|
||||
'timestamp': 1657216459,
|
||||
'duration': 181,
|
||||
},
|
||||
'params': {'format': 'bestvideo'}, # Test video-only fixup
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
|
||||
format_url = format_url.replace('-split.m3u8', '.m3u8')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
|
||||
# Sometimes only split video/audio formats are available, need to fixup video-only formats
|
||||
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
|
||||
for fmt in m3u8_formats:
|
||||
if is_not_premerged and fmt.get('vcodec') != 'none':
|
||||
fmt['acodec'] = 'none'
|
||||
yield {
|
||||
**fmt,
|
||||
'url': update_url(fmt['url'], query=query),
|
||||
|
||||
@@ -114,7 +114,7 @@ class LSMLREmbedIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = traverse_obj(query, (
|
||||
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
|
||||
('show', 'id'), 0, {int_or_none}, filter, {str_or_none}), get_all=False)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data, media_data = self._search_regex(
|
||||
|
||||
@@ -57,6 +57,6 @@ class MagentaMusikIE(InfoExtractor):
|
||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
||||
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
|
||||
'release_year': ('yearOfProduction', {int_or_none}),
|
||||
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
|
||||
'categories': ('mainGenre', {str}, all, filter),
|
||||
})),
|
||||
}
|
||||
|
||||
@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
|
||||
video_data = None
|
||||
|
||||
# fix meta_url if missing the host address
|
||||
if re.match(r'^\/\+\/', meta_url):
|
||||
if re.match(r'\/\+\/', meta_url):
|
||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||
|
||||
if meta_url:
|
||||
|
||||
@@ -1,51 +1,35 @@
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import xpath_text
|
||||
|
||||
|
||||
class MatchTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
|
||||
_VALID_URL = [
|
||||
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
|
||||
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://matchtv.ru/#live-player',
|
||||
'url': 'http://matchtv.ru/on-air/',
|
||||
'info_dict': {
|
||||
'id': 'matchtv-live',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://matchtv.ru/on-air/',
|
||||
'url': 'https://video.matchtv.ru/iframe/channel/106',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'matchtv-live'
|
||||
video_url = self._download_json(
|
||||
'http://player.matchtv.ntvplus.tv/player/smil', video_id,
|
||||
query={
|
||||
'ts': '',
|
||||
'quality': 'SD',
|
||||
'contentId': '561d2c0df7159b37178b4567',
|
||||
'sign': '',
|
||||
'includeHighlights': '0',
|
||||
'userId': '',
|
||||
'sessionId': random.randint(1, 1000000000),
|
||||
'contentType': 'channel',
|
||||
'timeShift': '0',
|
||||
'platform': 'portal',
|
||||
},
|
||||
headers={
|
||||
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
|
||||
})['data']['videoUrl']
|
||||
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'Матч ТВ - Прямой эфир',
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
IE_DESC = 'MDR.DE'
|
||||
_VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
@@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
||||
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
||||
'info_dict': {
|
||||
'id': '19636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baumhaus vom 30. Oktober 2015',
|
||||
'duration': 134,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
||||
'info_dict': {
|
||||
'id': '8182',
|
||||
'ext': 'mp4',
|
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||
'timestamp': 1482541200,
|
||||
'upload_date': '20161224',
|
||||
'duration': 4628,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
# audio with alternative playerURL pattern
|
||||
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
|
||||
@@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
|
||||
'duration': 3239,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
# empty bitrateVideo and bitrateAudio
|
||||
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
|
||||
'info_dict': {
|
||||
'id': '128372',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der kleine Wichtel kehrt zurück',
|
||||
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
|
||||
'duration': 4876,
|
||||
'timestamp': 1607823300,
|
||||
'upload_date': '20201213',
|
||||
'uploader': 'ZDF',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
|
||||
(?P<id>[^/#?_]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
|
||||
'info_dict': {
|
||||
'id': '668177',
|
||||
'title': 'Az ajtó',
|
||||
'display_id': 'az-ajto',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
|
||||
},
|
||||
}, {
|
||||
# (old) mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||
'info_dict': {
|
||||
@@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230903',
|
||||
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) m4sport
|
||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||
@@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230908',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# m4sport with *video/ url and no date
|
||||
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
|
||||
@@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) hirado
|
||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||
@@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230911',
|
||||
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}, {
|
||||
# (old) petofilive
|
||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||
@@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230909',
|
||||
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -133,7 +147,9 @@ class MediaKlikkIE(InfoExtractor):
|
||||
r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
|
||||
|
||||
player_data['video'] = player_data.pop('token')
|
||||
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
|
||||
player_page = self._download_webpage(
|
||||
'https://player.mediaklikk.hu/playernew/player.php', video_id,
|
||||
query=player_data, headers={'Referer': url})
|
||||
player_json = self._search_json(
|
||||
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
|
||||
playlist_url = traverse_obj(
|
||||
@@ -141,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
|
||||
if not playlist_url:
|
||||
raise ExtractorError('Unable to extract playlist url')
|
||||
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user