mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-27 09:11:32 +00:00
Merge branch 'yt-dlp:master' into fix-dailywire
This commit is contained in:
@@ -76,6 +76,7 @@ from .aenetworks import (
|
||||
)
|
||||
from .aeonco import AeonCoIE
|
||||
from .afreecatv import (
|
||||
AfreecaTVCatchStoryIE,
|
||||
AfreecaTVIE,
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
@@ -216,6 +217,7 @@ from .bbc import (
|
||||
BBCCoUkIPlayerGroupIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
)
|
||||
from .beacon import BeaconTvIE
|
||||
from .beatbump import (
|
||||
BeatBumpPlaylistIE,
|
||||
BeatBumpVideoIE,
|
||||
@@ -361,7 +363,10 @@ from .ccc import (
|
||||
)
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .cda import (
|
||||
CDAIE,
|
||||
CDAFolderIE,
|
||||
)
|
||||
from .cellebrite import CellebriteIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .cgtn import CGTNIE
|
||||
@@ -396,8 +401,6 @@ from .cmt import CMTIE
|
||||
from .cnbc import CNBCVideoIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNArticleIE,
|
||||
CNNBlogsIE,
|
||||
CNNIndonesiaIE,
|
||||
)
|
||||
from .comedycentral import (
|
||||
@@ -504,7 +507,6 @@ from .dhm import DHMIE
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .discogs import DiscogsReleasePlaylistIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dlf import (
|
||||
@@ -532,16 +534,12 @@ from .dplay import (
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
DiscoveryPlusItalyIE,
|
||||
DiscoveryPlusItalyShowIE,
|
||||
DIYNetworkIE,
|
||||
DPlayIE,
|
||||
FoodNetworkIE,
|
||||
GlobalCyclingNetworkPlusIE,
|
||||
GoDiscoveryIE,
|
||||
HGTVDeIE,
|
||||
HGTVUsaIE,
|
||||
InvestigationDiscoveryIE,
|
||||
MotorTrendIE,
|
||||
MotorTrendOnDemandIE,
|
||||
ScienceChannelIE,
|
||||
TravelChannelIE,
|
||||
)
|
||||
@@ -734,6 +732,7 @@ from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .germanupa import GermanupaIE
|
||||
from .getcourseru import (
|
||||
GetCourseRuIE,
|
||||
GetCourseRuPlayerIE,
|
||||
@@ -780,6 +779,7 @@ from .gopro import GoProIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gotostage import GoToStageIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .graspop import GraspopIE
|
||||
from .gronkh import (
|
||||
GronkhFeedIE,
|
||||
GronkhIE,
|
||||
@@ -826,7 +826,10 @@ from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .huya import HuyaLiveIE
|
||||
from .huya import (
|
||||
HuyaLiveIE,
|
||||
HuyaVideoIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .hypergryph import MonsterSirenHypergryphMusicIE
|
||||
from .hytale import HytaleIE
|
||||
@@ -943,11 +946,13 @@ from .khanacademy import (
|
||||
KhanAcademyUnitIE,
|
||||
)
|
||||
from .kick import (
|
||||
KickClipIE,
|
||||
KickIE,
|
||||
KickVODIE,
|
||||
)
|
||||
from .kicker import KickerIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kika import KikaIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
@@ -970,6 +975,10 @@ from .la7 import (
|
||||
LA7PodcastEpisodeIE,
|
||||
LA7PodcastIE,
|
||||
)
|
||||
from .laracasts import (
|
||||
LaracastsIE,
|
||||
LaracastsPlaylistIE,
|
||||
)
|
||||
from .lastfm import (
|
||||
LastFMIE,
|
||||
LastFMPlaylistIE,
|
||||
@@ -986,6 +995,7 @@ from .lcp import (
|
||||
LcpIE,
|
||||
LcpPlayIE,
|
||||
)
|
||||
from .learningonscreen import LearningOnScreenIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lecturio import (
|
||||
LecturioCourseIE,
|
||||
@@ -1034,10 +1044,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .livestreamfails import LivestreamfailsIE
|
||||
from .lnkgo import (
|
||||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .lnk import LnkIE
|
||||
from .loom import (
|
||||
LoomFolderIE,
|
||||
LoomIE,
|
||||
@@ -1114,12 +1121,15 @@ from .meipai import MeipaiIE
|
||||
from .melonvod import MelonVODIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgtv import MGTVIE
|
||||
from .microsoftembed import MicrosoftEmbedIE
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
MicrosoftVirtualAcademyIE,
|
||||
from .microsoftembed import (
|
||||
MicrosoftBuildIE,
|
||||
MicrosoftEmbedIE,
|
||||
MicrosoftLearnEpisodeIE,
|
||||
MicrosoftLearnPlaylistIE,
|
||||
MicrosoftLearnSessionIE,
|
||||
MicrosoftMediusIE,
|
||||
)
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .mildom import (
|
||||
MildomClipIE,
|
||||
MildomIE,
|
||||
@@ -1159,6 +1169,7 @@ from .mlb import (
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mocha import MochaVideoIE
|
||||
from .mojevideo import MojevideoIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .motherless import (
|
||||
@@ -1604,6 +1615,7 @@ from .qqmusic import (
|
||||
QQMusicPlaylistIE,
|
||||
QQMusicSingerIE,
|
||||
QQMusicToplistIE,
|
||||
QQMusicVideoIE,
|
||||
)
|
||||
from .r7 import (
|
||||
R7IE,
|
||||
@@ -1756,7 +1768,10 @@ from .rtve import (
|
||||
RTVETelevisionIE,
|
||||
)
|
||||
from .rtvs import RTVSIE
|
||||
from .rtvslo import RTVSLOIE
|
||||
from .rtvslo import (
|
||||
RTVSLOIE,
|
||||
RTVSLOShowIE,
|
||||
)
|
||||
from .rudovideo import RudoVideoIE
|
||||
from .rule34video import Rule34VideoIE
|
||||
from .rumble import (
|
||||
@@ -1801,6 +1816,7 @@ from .screen9 import Screen9IE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastify import ScreencastifyIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenrec import ScreenRecIE
|
||||
from .scrippsnetworks import (
|
||||
ScrippsNetworksIE,
|
||||
ScrippsNetworksWatchIE,
|
||||
@@ -1811,6 +1827,7 @@ from .scte import (
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .sejmpl import SejmIE
|
||||
from .sen import SenIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import (
|
||||
SenateGovIE,
|
||||
@@ -1866,6 +1883,7 @@ from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snapchat import SnapchatSpotlightIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
@@ -1926,6 +1944,10 @@ from .spreaker import (
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .sproutvideo import (
|
||||
SproutVideoIE,
|
||||
VidsIoIE,
|
||||
)
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
@@ -2158,10 +2180,7 @@ from .tv5unis import (
|
||||
TV5UnisVideoIE,
|
||||
)
|
||||
from .tv24ua import TV24UAVideoIE
|
||||
from .tva import (
|
||||
TVAIE,
|
||||
QubIE,
|
||||
)
|
||||
from .tva import TVAIE
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesArticleIE,
|
||||
TVANouvellesIE,
|
||||
@@ -2301,6 +2320,7 @@ from .videomore import (
|
||||
VideomoreVideoIE,
|
||||
)
|
||||
from .videopress import VideoPressIE
|
||||
from .vidflex import VidflexIE
|
||||
from .vidio import (
|
||||
VidioIE,
|
||||
VidioLiveIE,
|
||||
@@ -2308,6 +2328,7 @@ from .vidio import (
|
||||
)
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidly import VidlyIE
|
||||
from .vidyard import VidyardIE
|
||||
from .viewlift import (
|
||||
ViewLiftEmbedIE,
|
||||
ViewLiftIE,
|
||||
@@ -2373,6 +2394,10 @@ from .vrt import (
|
||||
VrtNUIE,
|
||||
)
|
||||
from .vtm import VTMIE
|
||||
from .vtv import (
|
||||
VTVIE,
|
||||
VTVGoIE,
|
||||
)
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
|
||||
@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'skip': 'This program is not currently available in ABC iview',
|
||||
}, {
|
||||
'url': 'https://iview.abc.net.au/show/inbestigators',
|
||||
'info_dict': {
|
||||
'id': '175343-1',
|
||||
'title': 'Series 1',
|
||||
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
|
||||
'series': 'The Inbestigators',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage_data = self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
video_data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
|
||||
transform_source=lambda x: x.encode().decode('unicode_escape'),
|
||||
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
|
||||
@@ -9,12 +9,12 @@ import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..networking import RequestHandler, Response
|
||||
from ..networking.exceptions import TransportError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -26,37 +26,36 @@ from ..utils import (
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
headers = ydl.params['http_headers'].copy()
|
||||
proxies = ydl.proxies.copy()
|
||||
clean_proxies(proxies, headers)
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
class AbemaLicenseRH(RequestHandler):
|
||||
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
|
||||
_SUPPORTED_PROXY_SCHEMES = None
|
||||
_SUPPORTED_FEATURES = None
|
||||
RH_NAME = 'abematv_license'
|
||||
|
||||
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
handler_order = 499
|
||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, ie: 'AbemaTVIE'):
|
||||
# the protocol that this should really handle is 'abematv-license://'
|
||||
# abematv_license_open is just a placeholder for development purposes
|
||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
|
||||
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.ie = ie
|
||||
|
||||
def _send(self, request):
|
||||
url = request.url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
|
||||
try:
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
except ExtractorError as e:
|
||||
raise TransportError(cause=e.cause) from e
|
||||
except (IndexError, KeyError, TypeError) as e:
|
||||
raise TransportError(cause=repr(e)) from e
|
||||
|
||||
return Response(
|
||||
io.BytesIO(response_data), url,
|
||||
headers={'Content-Length': str(len(response_data))})
|
||||
|
||||
def _get_videokey_from_ticket(self, ticket):
|
||||
to_show = self.ie.get_param('verbose', False)
|
||||
media_token = self.ie._get_media_token(to_show=to_show)
|
||||
@@ -72,25 +71,17 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self.STRTABLE)
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self.HKEY),
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
@@ -139,7 +130,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
@@ -368,6 +359,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
|
||||
is_live, m3u8_url = False, None
|
||||
availability = 'public'
|
||||
if video_type == 'now-on-air':
|
||||
is_live = True
|
||||
channel_url = 'https://api.abema.io/v1/channels'
|
||||
@@ -385,10 +377,10 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
|
||||
if 3 not in ondemand_types:
|
||||
if not traverse_obj(api_response, ('label', 'free', {bool})):
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'name'),
|
||||
@@ -408,6 +400,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||
else:
|
||||
@@ -425,6 +418,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
@@ -48,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '9841',
|
||||
@@ -70,10 +71,7 @@ class ADNIE(ADNBaseIE):
|
||||
},
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
@@ -166,7 +164,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'username': username,
|
||||
})) or {}).get('accessToken')
|
||||
if access_token:
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
self._HEADERS['Authorization'] = f'Bearer {access_token}'
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
@@ -177,6 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
@@ -217,7 +216,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
@@ -256,6 +254,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
f'Downloading {format_id} {quality} JSON metadata',
|
||||
headers=self._HEADERS,
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
@@ -276,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + f'video/{video_id}', video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
@@ -298,9 +297,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
|
||||
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
@@ -311,24 +310,22 @@ class ADNSeasonIE(ADNBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'Downloading episode list', headers=self._HEADERS, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(
|
||||
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
|
||||
ADNIE, episode_id)
|
||||
yield self.url_result(join_nonempty(
|
||||
'https://animationdigitalnetwork.com', lang, 'video',
|
||||
video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
||||
@@ -1355,6 +1355,7 @@ MSO_INFO = {
|
||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
@@ -1454,7 +1455,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
}, headers={
|
||||
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
'User-Agent': self._MODERN_USER_AGENT,
|
||||
} if mso_id == 'Comcast_SSO' else None)
|
||||
elif not self._cookies_passed:
|
||||
raise_mvpd_required()
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -32,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'https://login.sooplive.co.kr/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
|
||||
-6: 'https://login.sooplive.co.kr/membership/changeMember.php',
|
||||
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
|
||||
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
|
||||
-12: 'https://member.sooplive.co.kr/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-7: 'You cannot use your Global Soop account to access Korean Soop.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
@@ -58,71 +59,42 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
f'Unable to login: {self.IE_NAME} said: {error}',
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
return self._download_json(Request(
|
||||
f'https://api.m.sooplive.co.kr/{endpoint}',
|
||||
data=data, headers=headers, query=query,
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
IE_NAME = 'soop'
|
||||
IE_DESC = 'sooplive.co.kr'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'url': 'https://vod.sooplive.co.kr/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
@@ -134,12 +106,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'url': 'https://vod.sooplive.co.kr/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
@@ -149,44 +121,25 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/70395877',
|
||||
'url': 'https://vod.sooplive.co.kr/player/70395877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subscribers only
|
||||
'url': 'https://vod.afreecatv.com/player/104647403',
|
||||
'url': 'https://vod.sooplive.co.kr/player/104647403',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private
|
||||
'url': 'https://vod.afreecatv.com/player/81669846',
|
||||
'url': 'https://vod.sooplive.co.kr/player/81669846',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/station/video/a/view', video_id,
|
||||
headers={'Referer': url}, data=urlencode_postdata({
|
||||
data = self._call_api(
|
||||
'station/video/a/view', video_id, headers={'Referer': url},
|
||||
data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
@@ -253,12 +206,49 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:live'
|
||||
IE_DESC = 'afreecatv.com livestreams'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:catchstory'
|
||||
IE_DESC = 'sooplive.co.kr catch story'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'catchstory/a/view', video_id, headers={'Referer': url},
|
||||
query={'aStoryListIdx': '', 'nStoryIdx': video_id})
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
@staticmethod
|
||||
def _entries(data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:live'
|
||||
IE_DESC = 'sooplive.co.kr livestreams'
|
||||
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'info_dict': {
|
||||
'id': '237852185',
|
||||
'ext': 'mp4',
|
||||
@@ -270,30 +260,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
|
||||
_WORKING_CDNS = [
|
||||
'gcp_cdn', # live-global-cdn-v02.afreecatv.com
|
||||
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
|
||||
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
|
||||
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
|
||||
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
|
||||
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
|
||||
]
|
||||
_BAD_CDNS = [
|
||||
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
|
||||
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
|
||||
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
|
||||
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
|
||||
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
|
||||
]
|
||||
|
||||
def _extract_formats(self, channel_info, broadcast_no, aid):
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
|
||||
|
||||
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
|
||||
default_cdn_ids = orderedSet([
|
||||
@@ -313,7 +303,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
try:
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
|
||||
headers={'Referer': 'https://play.afreecatv.com/'})
|
||||
headers={'Referer': 'https://play.sooplive.co.kr/'})
|
||||
except ExtractorError as e:
|
||||
if attempt == len(cdn_ids):
|
||||
raise
|
||||
@@ -329,7 +319,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
if not broadcast_no:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
result = channel_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
elif result == -6:
|
||||
self.raise_login_required(
|
||||
'This channel is streaming for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract broadcast number')
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if channel_info.get('BPWD') == 'Y' and password is None:
|
||||
@@ -358,7 +354,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
formats = self._extract_formats(channel_info, broadcast_no, aid)
|
||||
|
||||
station_info = traverse_obj(self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
|
||||
'Downloading channel metadata', 'Unable to download channel metadata',
|
||||
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
|
||||
|
||||
@@ -374,11 +370,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:user'
|
||||
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -386,7 +382,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
@@ -394,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -402,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -414,12 +410,12 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
|
||||
@@ -1,27 +1,42 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'duration': 5369,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
@@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
|
||||
'format_id': join_nonempty(stream_type, vbr),
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
|
||||
@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
@@ -470,7 +470,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
|
||||
@@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, video_id, mpd_id='dash', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
|
||||
@@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
|
||||
self._HEADERS['Authorization'] = f'Bearer {token}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
|
||||
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
|
||||
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
|
||||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
|
||||
@@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||
]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
@@ -55,7 +47,9 @@ class AtresPlayerIE(InfoExtractor):
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 400)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
@@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor):
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 403)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
title = episode['titulo']
|
||||
|
||||
|
||||
@@ -4,9 +4,13 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,6 +34,7 @@ class BanByeBaseIE(InfoExtractor):
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
'info_dict': {
|
||||
@@ -58,6 +63,7 @@ class BanByeIE(BanByeBaseIE):
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||
'info_dict': {
|
||||
'id': 'v_kb6_o1Kyq-CD',
|
||||
@@ -77,6 +83,48 @@ class BanByeIE(BanByeBaseIE):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# ['src']['hls']['levels'] variant m3u8 urls only; master m3u8 is 404
|
||||
'url': 'https://banbye.com/watch/v_a_gPFuC9LoW5',
|
||||
'info_dict': {
|
||||
'id': 'v_a_gPFuC9LoW5',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:183524056bebdfa245fd6d214f63c0fe',
|
||||
'description': 'md5:943ac87287ca98d28d8b8797719827c6',
|
||||
'uploader': 'wRealu24',
|
||||
'channel_id': 'ch_wrealu24',
|
||||
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'upload_date': '20231113',
|
||||
'timestamp': 1699874062,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_a_gPFuC9LoW5/96.webp',
|
||||
'tags': ['jaszczur', 'sejm', 'lewica', 'polska', 'ukrainizacja', 'pierwszeposiedzeniesejmu'],
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8'],
|
||||
}, {
|
||||
# ['src']['hls']['masterPlaylist'] m3u8 only
|
||||
'url': 'https://banbye.com/watch/v_B0rsKWsr-aaa',
|
||||
'info_dict': {
|
||||
'id': 'v_B0rsKWsr-aaa',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:00b254164b82101b3f9e5326037447ed',
|
||||
'description': 'md5:3fd8b48aa81954ba024bc60f5de6e167',
|
||||
'uploader': 'PSTV Piotr Szlachtowicz ',
|
||||
'channel_id': 'ch_KV9EVObkB9wB',
|
||||
'channel_url': 'https://banbye.com/channel/ch_KV9EVObkB9wB',
|
||||
'upload_date': '20240629',
|
||||
'timestamp': 1719646816,
|
||||
'duration': 2377,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_B0rsKWsr-aaa/96.webp',
|
||||
'tags': ['Biden', 'Trump', 'Wybory', 'USA'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -91,11 +139,24 @@ class BanByeIE(BanByeBaseIE):
|
||||
'id': f'{quality}p',
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
||||
} for quality in [48, 96, 144, 240, 512, 1080]]
|
||||
formats = [{
|
||||
'format_id': f'http-{quality}p',
|
||||
'quality': quality,
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
|
||||
} for quality in data['quality']]
|
||||
|
||||
formats = []
|
||||
url_data = self._download_json(f'{self._API_BASE}/videos/{video_id}/url', video_id, data=b'')
|
||||
if master_url := traverse_obj(url_data, ('src', 'hls', 'masterPlaylist', {url_or_none})):
|
||||
formats = self._extract_m3u8_formats(master_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
for format_id, format_url in traverse_obj(url_data, (
|
||||
'src', ('mp4', 'hls'), 'levels', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
ext = determine_ext(format_url)
|
||||
is_hls = ext == 'm3u8'
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': 'mp4' if is_hls else ext,
|
||||
'format_id': join_nonempty(is_hls and 'hls', format_id),
|
||||
'protocol': 'm3u8_native' if is_hls else 'https',
|
||||
'height': int_or_none(format_id),
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import functools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -6,7 +8,9 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
@@ -17,6 +21,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
@@ -41,7 +46,7 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
@@ -459,7 +464,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
'title': 'Discography of coldworldofficial',
|
||||
@@ -473,12 +478,19 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _yield_items(self, webpage):
|
||||
yield from (
|
||||
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
yield from traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
|
||||
'data-client-items', {json.loads}, ..., 'page_url', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader)
|
||||
|
||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
||||
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||
getter=functools.partial(urljoin, url))
|
||||
|
||||
68
yt_dlp/extractor/beacon.py
Normal file
68
yt_dlp/extractor/beacon.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BeaconTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://beacon.tv/content/welcome-to-beacon',
|
||||
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
|
||||
'info_dict': {
|
||||
'id': 'welcome-to-beacon',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240509',
|
||||
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
|
||||
'title': 'Your home for Critical Role!',
|
||||
'timestamp': 1715227200,
|
||||
'duration': 105.494,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
|
||||
'md5': 'd879b091485dbed2245094c8152afd89',
|
||||
'info_dict': {
|
||||
'id': 're-slayers-take-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Re-Slayer’s Take | Official Trailer',
|
||||
'timestamp': 1715189040,
|
||||
'upload_date': '20240508',
|
||||
'duration': 53.249,
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', '__APOLLO_STATE__',
|
||||
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
|
||||
if not content_data:
|
||||
raise ExtractorError('Failed to extract content data')
|
||||
|
||||
jwplayer_data = traverse_obj(content_data, (
|
||||
(('contentVideo', 'video', 'videoData'),
|
||||
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
|
||||
if not jwplayer_data:
|
||||
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
|
||||
raise ExtractorError('Content is not a video/podcast', expected=True)
|
||||
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
|
||||
self.raise_login_required('This video/podcast is for members only')
|
||||
raise ExtractorError('Failed to extract content')
|
||||
|
||||
return {
|
||||
**self._parse_jwplayer_data(jwplayer_data, video_id),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
@@ -31,12 +31,12 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_count,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
@@ -46,7 +46,25 @@ from ..utils import (
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||
_wbi_key_cache = {}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
|
||||
|
||||
def _check_missing_formats(self, play_info, formats):
|
||||
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
|
||||
missing_formats = join_nonempty(*[
|
||||
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
|
||||
for fmt in traverse_obj(play_info, (
|
||||
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||
if missing_formats:
|
||||
self.to_screen(
|
||||
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||
f'become a premium member to download them. {self._login_hint()}')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
@@ -86,18 +104,75 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
|
||||
if missing_formats:
|
||||
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
|
||||
f'you have to login or become premium member to download them. {self._login_hint()}')
|
||||
if formats:
|
||||
self._check_missing_formats(play_info, formats)
|
||||
|
||||
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}))
|
||||
if fragments:
|
||||
formats.append({
|
||||
'url': fragments[0]['url'],
|
||||
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
|
||||
**({
|
||||
'fragments': fragments,
|
||||
'protocol': 'http_dash_segments',
|
||||
} if len(fragments) > 1 else {}),
|
||||
**traverse_obj(play_info, {
|
||||
'quality': ('quality', {int_or_none}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
|
||||
}),
|
||||
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _download_playinfo(self, video_id, cid, headers=None):
|
||||
def _get_wbi_key(self, video_id):
|
||||
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
|
||||
return self._wbi_key_cache['key']
|
||||
|
||||
session_data = self._download_json(
|
||||
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
|
||||
|
||||
lookup = ''.join(traverse_obj(session_data, (
|
||||
'data', 'wbi_img', ('img_url', 'sub_url'),
|
||||
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
|
||||
|
||||
# from getMixinKey() in the vendor js
|
||||
mixin_key_enc_tab = [
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
|
||||
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
|
||||
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
|
||||
36, 20, 34, 44, 52,
|
||||
]
|
||||
|
||||
self._wbi_key_cache.update({
|
||||
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
|
||||
'ts': time.time(),
|
||||
})
|
||||
return self._wbi_key_cache['key']
|
||||
|
||||
def _sign_wbi(self, params, video_id):
|
||||
params['wts'] = round(time.time())
|
||||
params = {
|
||||
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
|
||||
for k, v in sorted(params.items())
|
||||
}
|
||||
query = urllib.parse.urlencode(params)
|
||||
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||
return params
|
||||
|
||||
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||
if qn:
|
||||
params['qn'] = qn
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
||||
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||
query=self._sign_wbi(params, bvid), headers=headers,
|
||||
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -115,15 +190,15 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}],
|
||||
}
|
||||
|
||||
subtitle_info = traverse_obj(self._download_json(
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
|
||||
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
|
||||
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
|
||||
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in subs_list:
|
||||
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
self.report_warning(
|
||||
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in traverse_obj(video_info, (
|
||||
'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
||||
@@ -133,7 +208,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False)
|
||||
note='Extracting chapters', fatal=False, headers=self._HEADERS)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
'start_time': 'from',
|
||||
@@ -203,15 +278,15 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
||||
return cid_edges
|
||||
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo):
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
|
||||
graph_version = traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
@@ -224,7 +299,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
@@ -243,17 +318,17 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'timestamp': 1488353834,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'old av URL version',
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'info_dict': {
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'id': 'BV11x411K7CN',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
'id': 'BV11x411K7CN',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308.36,
|
||||
'upload_date': '20140420',
|
||||
'timestamp': 1397983878,
|
||||
@@ -262,6 +337,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'_old_archive_ids': ['bilibili 1074402_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -288,6 +365,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -308,28 +386,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'video has subtitles',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2',
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
@@ -347,6 +405,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -370,6 +429,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 463665680_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -388,8 +448,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 893839363_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'newer festival video',
|
||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||
@@ -406,8 +466,57 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 778246196_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'legacy flv/mp4 video',
|
||||
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
|
||||
'info_dict': {
|
||||
'id': 'BV1ms411Q7vw_p4',
|
||||
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||
'timestamp': 1458222815,
|
||||
'upload_date': '20160317',
|
||||
'description': '云南方言快乐生产线出品',
|
||||
'duration': float,
|
||||
'uploader': '一笑颠天',
|
||||
'uploader_id': '3916081',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 4120229_part4'],
|
||||
},
|
||||
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
|
||||
'playlist_count': 19,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1ms411Q7vw_p4_0',
|
||||
'ext': 'flv',
|
||||
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||
'duration': 399.102,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'note': 'legacy mp4-only video',
|
||||
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
|
||||
'info_dict': {
|
||||
'id': 'BV1nx411u79K',
|
||||
'ext': 'mp4',
|
||||
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
|
||||
'timestamp': 1508893551,
|
||||
'upload_date': '20171025',
|
||||
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
|
||||
'duration': 80.384,
|
||||
'uploader': '伯远',
|
||||
'uploader_id': '10584494',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 15700301_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'interactive/split-path video',
|
||||
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
||||
@@ -425,6 +534,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||
},
|
||||
'playlist_count': 33,
|
||||
'playlist': [{
|
||||
@@ -443,6 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -465,6 +576,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'upload_date': '20191021',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'note': 'video has subtitles, which requires login',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2', # login required for CC subtitle
|
||||
'_old_archive_ids': ['bilibili 898179753_part1'],
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
'skip': 'login required for subtitle',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
||||
'info_dict': {
|
||||
@@ -489,6 +623,10 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'geo-restricted',
|
||||
}, {
|
||||
'note': 'has - in the last path segment of the url',
|
||||
'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -498,8 +636,9 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
if not self._match_valid_url(urlh.url):
|
||||
return self.url_result(urlh.url)
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
headers['Referer'] = url
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
@@ -548,7 +687,6 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
aid = video_data.get('aid')
|
||||
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
||||
|
||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||
|
||||
festival_info = {}
|
||||
@@ -586,18 +724,65 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
||||
if is_interactive:
|
||||
return self.playlist_result(
|
||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
|
||||
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'formats': self.extract_formats(play_info),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if not traverse_obj(play_info, ('dash')):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
@@ -837,8 +1022,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BilibiliCheeseBaseIE(BilibiliBaseIE):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _extract_episode(self, season_info, ep_id):
|
||||
episode_info = traverse_obj(season_info, (
|
||||
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
|
||||
@@ -968,7 +1151,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
|
||||
}))
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
class BilibiliSpaceBaseIE(BilibiliBaseIE):
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
first_page = fetch_page(0)
|
||||
metadata = get_metadata(first_page)
|
||||
@@ -988,73 +1171,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/313580179/video',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_signature(self, playlist_id):
|
||||
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
||||
|
||||
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
||||
img_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
||||
sub_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
||||
|
||||
session_key = img_key + sub_key
|
||||
|
||||
signature_values = []
|
||||
for position in (
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
||||
57, 62, 11, 36, 20, 34, 44, 52,
|
||||
):
|
||||
char_at_position = try_call(lambda: session_key[position])
|
||||
if char_at_position:
|
||||
signature_values.append(char_at_position)
|
||||
|
||||
return ''.join(signature_values)[:32]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
|
||||
signature = self._extract_signature(playlist_id)
|
||||
|
||||
def fetch_page(page_idx):
|
||||
query = {
|
||||
'keyword': '',
|
||||
'mid': playlist_id,
|
||||
'order': 'pubdate',
|
||||
'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
|
||||
'order_avoided': 'true',
|
||||
'platform': 'web',
|
||||
'pn': page_idx + 1,
|
||||
'ps': 30,
|
||||
'tid': 0,
|
||||
'web_location': 1550101,
|
||||
'wts': int(time.time()),
|
||||
}
|
||||
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
||||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||
headers={'referer': url})
|
||||
response = self._download_json(
|
||||
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
|
||||
query=self._sign_wbi(query, playlist_id),
|
||||
note=f'Downloading space page {page_idx}', headers={'Referer': url})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
if response['code'] in (-352, -401):
|
||||
status_code = response['code']
|
||||
if status_code == -401:
|
||||
raise ExtractorError(
|
||||
f'Request is blocked by server ({-response["code"]}), '
|
||||
'please add cookies, wait and try later.', expected=True)
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
elif status_code == -352 and not self.is_logged_in:
|
||||
self.raise_login_required('Request is rejected, you need to login to access playlist')
|
||||
elif status_code != 0:
|
||||
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
|
||||
return response['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1280,7 +1443,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'info_dict': {
|
||||
'id': r're:\d+',
|
||||
'title': '稍后再看',
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
@@ -1356,14 +1522,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'info_dict': {
|
||||
'id': r're:2_\d+',
|
||||
'title': '稍后再看',
|
||||
'uploader': str,
|
||||
'uploader_id': str,
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
'skip': 'redirect url & login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
@@ -1414,7 +1585,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
@@ -1680,7 +1851,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
@@ -1808,7 +1979,8 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
||||
login_post = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
|
||||
data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': base64.b64encode(password_hash).decode('ascii'),
|
||||
'keep_me': 'true',
|
||||
@@ -2140,7 +2312,8 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||
series_info = self._call_api(
|
||||
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||
return self.playlist_result(
|
||||
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
|
||||
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
|
||||
|
||||
@@ -24,7 +24,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
@@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
@@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor):
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'info_dict': {
|
||||
'id': 'bitchute',
|
||||
'title': 'BitChute',
|
||||
'description': 'md5:5329fb3866125afa9446835594a9b138',
|
||||
'description': 'md5:2134c37d64fc3a4846787c402956adac',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
@@ -210,6 +213,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
@@ -230,7 +236,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _make_url(playlist_id, playlist_type):
|
||||
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
|
||||
def _fetch_page(self, playlist_id, playlist_type, page_num):
|
||||
playlist_url = self._make_url(playlist_id, playlist_type)
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<service>app|ent)\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
@@ -38,10 +38,22 @@ class BoxIE(InfoExtractor):
|
||||
'uploader_id': '239068974',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}, {
|
||||
'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065',
|
||||
'info_dict': {
|
||||
'id': '1536173056065',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '18523128264',
|
||||
'uploader': 'Lexi Hennigan',
|
||||
'title': 'iPSC Symposium recording part 1.mp4',
|
||||
'timestamp': 1716228343,
|
||||
'upload_date': '20240520',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = self._match_valid_url(url).groups()
|
||||
shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service')
|
||||
webpage = self._download_webpage(url, file_id or shared_name)
|
||||
|
||||
if not file_id:
|
||||
@@ -57,14 +69,14 @@ class BoxIE(InfoExtractor):
|
||||
request_token = self._search_json(
|
||||
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Request-Token': request_token,
|
||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||
})[file_id]['read']
|
||||
shared_link = 'https://app.box.com/s/' + shared_name
|
||||
shared_link = f'https://{service}.box.com/s/{shared_name}'
|
||||
f = self._download_json(
|
||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||
'Downloading file JSON metadata', headers={
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
@@ -386,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
return update_url_query(
|
||||
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
'https://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
@@ -470,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
'https://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
@@ -480,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
||||
brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
||||
if referer:
|
||||
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
@@ -538,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
|
||||
})
|
||||
|
||||
def build_format_id(kind):
|
||||
format_id = kind
|
||||
if tbr:
|
||||
format_id += f'-{int(tbr)}k'
|
||||
if height:
|
||||
format_id += f'-{height}p'
|
||||
return format_id
|
||||
return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
|
||||
|
||||
if src or streaming_src:
|
||||
f.update({
|
||||
@@ -801,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
# Look for iframe embeds [1]
|
||||
for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
|
||||
|
||||
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||
# [2] looks like:
|
||||
@@ -830,7 +826,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
player_id = player_id or attrs.get('data-player') or 'default'
|
||||
embed = embed or attrs.get('data-embed') or 'default'
|
||||
|
||||
bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
||||
bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
||||
|
||||
# Some brightcove videos may be embedded with video tag only and
|
||||
# without script tag or any mentioning of brightcove at all. Such
|
||||
@@ -867,7 +863,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
||||
base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
|
||||
@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1,22 +1,28 @@
|
||||
import base64
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
replace_extension,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
update_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -149,6 +155,7 @@ class CBCIE(InfoExtractor):
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||
@@ -172,21 +179,20 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||
'tags': 'count:8',
|
||||
'categories': ['All in a Weekend Montreal'],
|
||||
'tags': 'count:11',
|
||||
'location': 'Quebec',
|
||||
'series': 'All in a Weekend Montreal',
|
||||
'season': 'Season 2015',
|
||||
'season_number': 2015,
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['Other'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
|
||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'ext': 'mp4',
|
||||
@@ -194,107 +200,168 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
'series': 'CBC News: Windsor at 6:00',
|
||||
'categories': ['News/Canada/Windsor'],
|
||||
'categories': ['Windsor'],
|
||||
'location': 'Windsor',
|
||||
'tags': ['cancer'],
|
||||
'creators': ['Allison Johnson'],
|
||||
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['News'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'https://www.cbc.ca/player/play/1.2985700',
|
||||
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||
'info_dict': {
|
||||
'id': '2657631896',
|
||||
'id': '1.2985700',
|
||||
'ext': 'mp3',
|
||||
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
||||
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||
'tags': 'count:8',
|
||||
'categories': ['All in a Weekend Montreal'],
|
||||
'tags': 'count:11',
|
||||
'location': 'Quebec',
|
||||
'series': 'All in a Weekend Montreal',
|
||||
'season': 'Season 2015',
|
||||
'season_number': 2015,
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['Other'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/1.1711287',
|
||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'id': '1.1711287',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cancer survivor four times over',
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
'series': 'CBC News: Windsor at 6:00',
|
||||
'categories': ['News/Canada/Windsor'],
|
||||
'categories': ['Windsor'],
|
||||
'location': 'Windsor',
|
||||
'tags': ['cancer'],
|
||||
'creators': ['Allison Johnson'],
|
||||
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
|
||||
'media_type': 'Excerpt',
|
||||
'genres': ['News'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'https://www.cbc.ca/player/play/1.7159484',
|
||||
'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6424403',
|
||||
'md5': '8025909eaffcf0adf59922904def9a5e',
|
||||
'info_dict': {
|
||||
'id': '2324213316001',
|
||||
'id': '9.6424403',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | School boards sue social media giants',
|
||||
'description': 'md5:4b4db69322fa32186c3ce426da07402c',
|
||||
'timestamp': 1711681200,
|
||||
'duration': 2743.400,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'title': 'The National | N.W.T. wildfire emergency',
|
||||
'description': 'md5:ada33d36d1df69347ed575905bfd496c',
|
||||
'timestamp': 1718589600,
|
||||
'duration': 2692.833,
|
||||
'subtitles': {
|
||||
'en-US': [{
|
||||
'name': 'English Captions',
|
||||
'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt',
|
||||
}],
|
||||
},
|
||||
'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20240329',
|
||||
'categories': 'count:4',
|
||||
'upload_date': '20240617',
|
||||
'categories': ['News', 'The National', 'The National Latest Broadcasts'],
|
||||
'series': 'The National - Full Show',
|
||||
'tags': 'count:1',
|
||||
'creators': ['News'],
|
||||
'tags': ['The National'],
|
||||
'location': 'Canada',
|
||||
'media_type': 'Full Program',
|
||||
'genres': ['News'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
|
||||
'md5': '188b96cf6bdcb2540e178a6caa957128',
|
||||
'info_dict': {
|
||||
'id': '2334524995812',
|
||||
'id': '1.7194274',
|
||||
'ext': 'mp4',
|
||||
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
|
||||
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
|
||||
'timestamp': 1714788791,
|
||||
'duration': 77.678,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:0',
|
||||
'upload_date': '20240504',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
|
||||
'chapters': [],
|
||||
'categories': 'count:3',
|
||||
'series': 'The National',
|
||||
'tags': 'count:15',
|
||||
'creators': ['encoder'],
|
||||
'tags': 'count:17',
|
||||
'location': 'Canada',
|
||||
'media_type': 'Excerpt',
|
||||
'upload_date': '20240504',
|
||||
'genres': ['News'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6427282',
|
||||
'info_dict': {
|
||||
'id': '9.6427282',
|
||||
'ext': 'mp4',
|
||||
'title': 'Men\'s Soccer - Argentina vs Morocco',
|
||||
'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
|
||||
'series': 'CBC Sports',
|
||||
'media_type': 'Event Coverage',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
|
||||
'timestamp': 1721825400.0,
|
||||
'upload_date': '20240724',
|
||||
'duration': 10568.0,
|
||||
'chapters': [],
|
||||
'genres': [],
|
||||
'tags': ['2024 Paris Olympic Games'],
|
||||
'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
|
||||
'location': 'Canada',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6459530',
|
||||
'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
|
||||
'info_dict': {
|
||||
'id': '9.6459530',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parts of Jasper incinerated as wildfire rages',
|
||||
'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
|
||||
'series': 'The National',
|
||||
'media_type': 'Excerpt',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
|
||||
'timestamp': 1721964091.012,
|
||||
'upload_date': '20240726',
|
||||
'duration': 952.285,
|
||||
'chapters': [],
|
||||
'genres': [],
|
||||
'tags': 'count:23',
|
||||
'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/9.6420651',
|
||||
'md5': '71a850c2c6ee5e912de169f5311bb533',
|
||||
'info_dict': {
|
||||
'id': '9.6420651',
|
||||
'ext': 'mp4',
|
||||
'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
|
||||
'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
|
||||
'series': 'CBC News Edmonton',
|
||||
'media_type': 'Excerpt',
|
||||
'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
|
||||
'timestamp': 1718220065.768,
|
||||
'upload_date': '20240612',
|
||||
'duration': 286.086,
|
||||
'chapters': [],
|
||||
'genres': ['News'],
|
||||
'categories': ['News', 'Edmonton'],
|
||||
'tags': 'count:7',
|
||||
'location': 'Edmonton',
|
||||
},
|
||||
}, {
|
||||
'url': 'cbcplayer:1.7159484',
|
||||
@@ -307,23 +374,113 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_param(self, asset_data, name):
|
||||
return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if '.' in video_id:
|
||||
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
||||
video_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage,
|
||||
'initial state', video_id)['video']['currentClip']['mediaId']
|
||||
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
||||
data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip']
|
||||
assets = traverse_obj(
|
||||
data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type']))
|
||||
|
||||
if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))):
|
||||
# XXX: Deprecated; CBC is migrating off of ThePlatform
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
|
||||
'force_smil_url': True,
|
||||
}),
|
||||
'id': media_id,
|
||||
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
|
||||
}
|
||||
|
||||
is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live'
|
||||
formats, subtitles = [], {}
|
||||
|
||||
for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))):
|
||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['src'],
|
||||
'name': sub.get('label'),
|
||||
})
|
||||
|
||||
for asset in assets:
|
||||
asset_key = asset['key']
|
||||
asset_type = asset['type']
|
||||
if asset_type != 'medianet':
|
||||
self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}')
|
||||
continue
|
||||
asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON')
|
||||
ext = mimetype2ext(self._parse_param(asset_data, 'contentType'))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live)
|
||||
formats.extend(fmts)
|
||||
# Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
|
||||
if not subtitles:
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if is_live or not fmts:
|
||||
continue
|
||||
# Check for direct https mp4 format
|
||||
best_video_fmt = traverse_obj(fmts, (
|
||||
lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all,
|
||||
{functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {}
|
||||
base_url = self._search_regex(
|
||||
r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None)
|
||||
if not base_url or '/live/' in base_url:
|
||||
continue
|
||||
mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4')
|
||||
if self._request_webpage(
|
||||
HEADRequest(mp4_url), video_id, 'Checking for https format',
|
||||
errnote=False, fatal=False):
|
||||
formats.append({
|
||||
**best_video_fmt,
|
||||
'url': mp4_url,
|
||||
'format_id': 'https-mp4',
|
||||
'protocol': 'https',
|
||||
'manifest_url': None,
|
||||
'acodec': None,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': asset_data['url'],
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None,
|
||||
})
|
||||
|
||||
chapters = traverse_obj(data, (
|
||||
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
|
||||
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'title': ('name', {str}),
|
||||
}))
|
||||
# Filter out pointless single chapters with start_time==0 and no end_time
|
||||
if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')):
|
||||
chapters = []
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', {
|
||||
'force_smil_url': True,
|
||||
}),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
|
||||
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'media_type': ('media', 'clipType', {str}),
|
||||
'series': ('showName', {str}),
|
||||
'season_number': ('media', 'season', {int_or_none}),
|
||||
'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}),
|
||||
'location': ('media', 'region', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'genres': ('media', 'genre', all),
|
||||
'categories': ('categories', ..., 'name', {str}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
@@ -365,14 +522,13 @@ class CBCGemIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# This is a normal, public, TV show video
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s06e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoke Signals',
|
||||
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'duration': 1314,
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
||||
'duration': 1324,
|
||||
'categories': ['comedy'],
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season': 'Season 6',
|
||||
@@ -380,19 +536,21 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode': 'Smoke Signals',
|
||||
'episode_number': 1,
|
||||
'episode_id': 'schitts-creek/s06e01',
|
||||
'upload_date': '20210618',
|
||||
'timestamp': 1623988800,
|
||||
'release_date': '20200107',
|
||||
'release_timestamp': 1578427200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# This video requires an account in the browser, but works fine in yt-dlp
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
|
||||
'md5': '297a9600f554f2258aed01514226a697',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s01e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Cup Runneth Over',
|
||||
'description': 'md5:9bca14ea49ab808097530eb05a29e797',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
@@ -401,9 +559,12 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode_id': 'schitts-creek/s01e01',
|
||||
'duration': 1309,
|
||||
'categories': ['comedy'],
|
||||
'upload_date': '20210617',
|
||||
'timestamp': 1623902400,
|
||||
'release_date': '20151124',
|
||||
'release_timestamp': 1448323200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
|
||||
'only_matching': True,
|
||||
@@ -455,10 +616,8 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
def claims_token_expired(self):
|
||||
exp = self._get_claims_token_expiry()
|
||||
if exp - time.time() < 10:
|
||||
# It will expire in less than 10 seconds, or has already expired
|
||||
return True
|
||||
return False
|
||||
# It will expire in less than 10 seconds, or has already expired
|
||||
return exp - time.time() < 10
|
||||
|
||||
def claims_token_valid(self):
|
||||
return self._claims_token is not None and not self.claims_token_expired()
|
||||
@@ -474,38 +633,6 @@ class CBCGemIE(InfoExtractor):
|
||||
return
|
||||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||
|
||||
def _find_secret_formats(self, formats, video_id):
|
||||
""" Find a valid video url and convert it to the secret variant """
|
||||
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
|
||||
if not base_format:
|
||||
return
|
||||
|
||||
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
|
||||
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
|
||||
|
||||
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
|
||||
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
|
||||
return
|
||||
|
||||
for child in secret_xml:
|
||||
if child.attrib.get('Type') != 'video':
|
||||
continue
|
||||
for video_quality in child:
|
||||
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
|
||||
if not bitrate or 'Index' not in video_quality.attrib:
|
||||
continue
|
||||
height = int_or_none(video_quality.attrib.get('MaxHeight'))
|
||||
|
||||
yield {
|
||||
**base_format,
|
||||
'format_id': join_nonempty('sec', height),
|
||||
# Note: \g<1> is necessary instead of \1 since bitrate is a number
|
||||
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
|
||||
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
|
||||
'tbr': bitrate / 1000.0,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json(
|
||||
@@ -519,7 +646,6 @@ class CBCGemIE(InfoExtractor):
|
||||
else:
|
||||
headers = {}
|
||||
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
||||
m3u8_url = m3u8_info.get('url')
|
||||
|
||||
if m3u8_info.get('errorCode') == 1:
|
||||
self.raise_geo_restricted(countries=['CA'])
|
||||
@@ -528,9 +654,9 @@ class CBCGemIE(InfoExtractor):
|
||||
elif m3u8_info.get('errorCode') != 0:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
||||
self._remove_duplicate_formats(formats)
|
||||
formats.extend(self._find_secret_formats(formats, video_id))
|
||||
|
||||
for fmt in formats:
|
||||
if fmt.get('vcodec') == 'none':
|
||||
@@ -546,20 +672,21 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'series': video_info.get('series'),
|
||||
'season_number': video_info.get('season'),
|
||||
'season': f'Season {video_info.get("season")}',
|
||||
'episode_number': video_info.get('episode'),
|
||||
'episode': video_info.get('title'),
|
||||
'episode_id': video_id,
|
||||
'duration': video_info.get('duration'),
|
||||
'categories': [video_info.get('category')],
|
||||
'formats': formats,
|
||||
'release_timestamp': video_info.get('airDate'),
|
||||
'timestamp': video_info.get('availableDate'),
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'series': ('series', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'categories': ('category', {str}, all),
|
||||
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
@@ -649,11 +776,11 @@ class CBCGemLiveIE(InfoExtractor):
|
||||
'title': 'Ottawa',
|
||||
'description': 'The live TV channel and local programming from Ottawa',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
'id': 'AyqZwxRqh8EH',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492106160,
|
||||
'upload_date': '20170413',
|
||||
'release_timestamp': 1492106160,
|
||||
'release_date': '20170413',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Live might have ended',
|
||||
@@ -682,49 +809,84 @@ class CBCGemLiveIE(InfoExtractor):
|
||||
'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
|
||||
'timestamp': 1679706000,
|
||||
'upload_date': '20230325',
|
||||
'release_timestamp': 1679706000,
|
||||
'release_date': '20230325',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Live might have ended',
|
||||
},
|
||||
{ # event replay (medianetlive)
|
||||
'url': 'https://gem.cbc.ca/live-event/42314',
|
||||
'md5': '297a9600f554f2258aed01514226a697',
|
||||
'info_dict': {
|
||||
'id': '42314',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'was_live',
|
||||
'title': 'Women\'s Soccer - Canada vs New Zealand',
|
||||
'description': 'md5:36200e5f1a70982277b5a6ecea86155d',
|
||||
'thumbnail': r're:https://.+default\.jpg',
|
||||
'release_timestamp': 1721917200,
|
||||
'release_date': '20240725',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Replay might no longer be available',
|
||||
},
|
||||
{ # event replay (medianetlive)
|
||||
'url': 'https://gem.cbc.ca/live-event/43273',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
|
||||
|
||||
# Two types of metadata JSON
|
||||
# Three types of video_info JSON: info in root, freeTv stream/item, event replay
|
||||
if not video_info.get('formattedIdMedia'):
|
||||
video_info = traverse_obj(
|
||||
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
|
||||
get_all=False, default={})
|
||||
if traverse_obj(video_info, ('event', 'key')) == video_id:
|
||||
video_info = video_info['event']
|
||||
else:
|
||||
video_info = traverse_obj(video_info, (
|
||||
('freeTv', ('streams', ...)), 'items',
|
||||
lambda _, v: v['key'].partition('-')[0] == video_id, any)) or {}
|
||||
|
||||
video_stream_id = video_info.get('formattedIdMedia')
|
||||
if not video_stream_id:
|
||||
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||
raise ExtractorError(
|
||||
'Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||
|
||||
stream_data = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||
'appCode': 'mpx',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'idMedia': video_stream_id,
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestType': 'desktop',
|
||||
})
|
||||
live_status = 'was_live' if video_info.get('isVodEnabled') else 'is_live'
|
||||
release_timestamp = traverse_obj(video_info, ('airDate', {parse_iso8601}))
|
||||
|
||||
if live_status == 'is_live' and release_timestamp and release_timestamp > time.time():
|
||||
formats = []
|
||||
live_status = 'is_upcoming'
|
||||
self.raise_no_formats('This livestream has not yet started', expected=True)
|
||||
else:
|
||||
stream_data = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||
'appCode': 'medianetlive',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'idMedia': video_stream_id,
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestType': 'desktop',
|
||||
})
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_data['url'], video_id, 'mp4', live=live_status == 'is_live')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': release_timestamp,
|
||||
**traverse_obj(video_info, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('images', 'card', 'url'),
|
||||
'timestamp': ('airDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import base64
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import zlib
|
||||
|
||||
|
||||
@@ -12,53 +12,86 @@ from ..utils import (
|
||||
|
||||
|
||||
class CCMAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
|
||||
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||
'info_dict': {
|
||||
'id': '5630208',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
'alt_title': 'EsportMarató2016WEB_PerPublicar',
|
||||
'duration': 79,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
|
||||
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'categories': ['Divulgació'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
'md5': 'fa3e38f269329a278271276330261425',
|
||||
'info_dict': {
|
||||
'id': '943685',
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'upload_date': '20161217',
|
||||
'timestamp': 1482011700,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
'series': 'Tot gira',
|
||||
'duration': 821,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
|
||||
'md5': '27493513d08a3e5605814aee9bb778d2',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'timestamp': 1582577919,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'subtitles': 'mincount:1',
|
||||
'age_limit': 13,
|
||||
'series': 'Crims',
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
|
||||
'duration': 3203,
|
||||
'categories': ['Divulgació'],
|
||||
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
|
||||
'info_dict': {
|
||||
'id': '5759227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Una mosca volava per la llum',
|
||||
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
|
||||
'description': 'md5:9ab64276944b0825336f4147f13f7854',
|
||||
'series': 'Mic',
|
||||
'upload_date': '20180411',
|
||||
'timestamp': 1523440105,
|
||||
'duration': 160,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
|
||||
'categories': ['Música'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_type, media_id = self._match_valid_url(url).groups()
|
||||
media_type, media_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
media = self._download_json(
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
|
||||
@@ -12,6 +12,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_ord
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
@@ -351,3 +352,50 @@ class CDAIE(InfoExtractor):
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
'info_dict': {
|
||||
'id': '31188385',
|
||||
'title': 'SERIA DRUGA',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
|
||||
'info_dict': {
|
||||
'id': '2664592',
|
||||
'title': 'VideoDowcipy - wszystkie odcinki',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
|
||||
'info_dict': {
|
||||
'id': '19129979',
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
|
||||
|
||||
webpage = self._download_webpage(url, folder_id)
|
||||
|
||||
def extract_page_entries(page):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
|
||||
f'Downloading page {page + 1}', expected_status=404)
|
||||
items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
|
||||
for video_id in items:
|
||||
yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
|
||||
folder_id, self._og_search_title(webpage))
|
||||
|
||||
@@ -1,63 +1,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
from .vidyard import VidyardBaseIE, VidyardIE
|
||||
from ..utils import ExtractorError, make_archive_id, url_basename
|
||||
|
||||
|
||||
class CellebriteIE(InfoExtractor):
|
||||
class CellebriteIE(VidyardBaseIE):
|
||||
_VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
|
||||
'info_dict': {
|
||||
'id': '16025876',
|
||||
'id': 'ZqmUss3dQfEMGpauambPuH',
|
||||
'display_id': '16025876',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
|
||||
'duration': 455,
|
||||
'tags': [],
|
||||
'description': 'md5:dee48fe12bbae5c01fe6a053f7676da4',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'duration': 455.979,
|
||||
'_old_archive_ids': ['cellebrite 16025876'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
|
||||
'info_dict': {
|
||||
'id': '29018255',
|
||||
'id': 'QV1U8a2yzcxigw7VFnqKyg',
|
||||
'display_id': '29018255',
|
||||
'ext': 'mp4',
|
||||
'duration': 134,
|
||||
'tags': [],
|
||||
'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
|
||||
'title': 'How to Lawfully Collect the Maximum Amount of Data From Android Devices',
|
||||
'description': 'md5:0e943a9ac14c374d5d74faed634d773c',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
|
||||
'title': 'Android Extractions Explained',
|
||||
'duration': 134.315,
|
||||
'_old_archive_ids': ['cellebrite 29018255'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitles(self, json_data, display_id):
|
||||
formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
|
||||
subtitles = {}
|
||||
|
||||
for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
|
||||
fmt, sub = self._extract_m3u8_formats_and_subtitles(
|
||||
url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(sub, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
slug = self._match_id(url)
|
||||
webpage = self._download_webpage(url, slug)
|
||||
vidyard_url = next(VidyardIE._extract_embed_urls(url, webpage), None)
|
||||
if not vidyard_url:
|
||||
raise ExtractorError('No Vidyard video embeds found on page')
|
||||
|
||||
player_uuid = self._search_regex(
|
||||
r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
|
||||
video_id = url_basename(vidyard_url)
|
||||
info = self._process_video_json(self._fetch_video_json(video_id)['chapters'][0], video_id)
|
||||
if info.get('display_id'):
|
||||
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||
if thumbnail := self._og_search_thumbnail(webpage, default=None):
|
||||
info.setdefault('thumbnails', []).append({'url': thumbnail})
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
|
||||
return {
|
||||
'id': str(json_data['videoId']),
|
||||
'title': json_data.get('name') or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': json_data.get('description') or self._og_search_description(webpage),
|
||||
'duration': json_data.get('seconds'),
|
||||
'tags': json_data.get('tags'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'http_headers': {'Referer': 'https://play.vidyard.com/'},
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
**info,
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
live_detail = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
||||
f'https://api.chzzk.naver.com/service/v3/channels/{channel_id}/live-detail', channel_id,
|
||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
@@ -106,12 +106,45 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
'upload_date': '20231219',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Replay video is expired',
|
||||
}, {
|
||||
# Manually uploaded video
|
||||
'url': 'https://chzzk.naver.com/video/1980',
|
||||
'info_dict': {
|
||||
'id': '1980',
|
||||
'ext': 'mp4',
|
||||
'title': '※시청주의※한번보면 잊기 힘든 영상',
|
||||
'channel': '라디유radiyu',
|
||||
'channel_id': '68f895c59a1043bc5019b5e08c83a5c5',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 95,
|
||||
'timestamp': 1703102631.722,
|
||||
'upload_date': '20231220',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# Partner channel replay video
|
||||
'url': 'https://chzzk.naver.com/video/2458',
|
||||
'info_dict': {
|
||||
'id': '2458',
|
||||
'ext': 'mp4',
|
||||
'title': '첫 방송',
|
||||
'channel': '강지',
|
||||
'channel_id': 'b5ed5db484d04faf4d150aedd362f34b',
|
||||
'channel_is_verified': True,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 4433,
|
||||
'timestamp': 1703307460.214,
|
||||
'upload_date': '20231223',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
||||
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
|
||||
@@ -6,11 +6,11 @@ from .common import InfoExtractor
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
|
||||
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
]
|
||||
_TESTS = [{
|
||||
@@ -24,6 +24,14 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
|
||||
'info_dict': {
|
||||
'id': '0e8e040aec776862e1d632a699edf59e',
|
||||
'ext': 'mp4',
|
||||
'title': '0e8e040aec776862e1d632a699edf59e',
|
||||
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
'only_matching': True,
|
||||
@@ -36,6 +44,9 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
},
|
||||
}, {
|
||||
# Video-only m3u8 formats need manual fixup
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
|
||||
'md5': 'fc472e40f6e6238446509be411c920e2',
|
||||
'info_dict': {
|
||||
'id': '08j_d24-6000-074',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240620',
|
||||
'duration': 1673,
|
||||
'title': 'D24-6000-074-cetstud',
|
||||
'timestamp': 1718902233,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
@@ -63,6 +79,9 @@ class CloudyCDNIE(InfoExtractor):
|
||||
formats, subtitles = [], {}
|
||||
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
||||
for fmt in fmts:
|
||||
if re.search(r'chunklist_b\d+_vo_', fmt['url']):
|
||||
fmt['acodec'] = 'none'
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
|
||||
@@ -1,146 +1,226 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import merge_dicts, try_call, url_basename
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_call,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_elements, traverse_obj
|
||||
|
||||
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'info_dict': {
|
||||
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
|
||||
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
'upload_date': '20240531',
|
||||
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
|
||||
'duration': 373.0,
|
||||
'timestamp': 1717148586,
|
||||
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
|
||||
'modified_date': '20240531',
|
||||
'modified_timestamp': 1717150140,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'info_dict': {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
|
||||
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'ext': 'mp4',
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
|
||||
'title': 'Here’s how some inmates in closely divided state are now able to vote from jail',
|
||||
'timestamp': 1718158269,
|
||||
'upload_date': '20240612',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
|
||||
'duration': 202.0,
|
||||
'modified_date': '20240612',
|
||||
'modified_timestamp': 1718158509,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
|
||||
'info_dict': {
|
||||
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
|
||||
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
|
||||
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
|
||||
'title': 'Video shows King Charles\' portrait being vandalized by activists',
|
||||
'timestamp': 1718113852,
|
||||
'upload_date': '20240611',
|
||||
'duration': 51.0,
|
||||
'modified_timestamp': 1718116193,
|
||||
'modified_date': '20240611',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
|
||||
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
|
||||
'duration': 158.0,
|
||||
'title': 'Robin Meade signs off after HLN\'s last broadcast',
|
||||
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670284296,
|
||||
'modified_timestamp': 1670332404,
|
||||
'modified_date': '20221206',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'info_dict': {
|
||||
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'timestamp': 1729501452,
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
|
||||
'description': 'md5:256ee7137d161f776cda429654135e52',
|
||||
'upload_date': '20241021',
|
||||
'duration': 31.0,
|
||||
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
|
||||
'modified_date': '20241021',
|
||||
'modified_timestamp': 1729501530,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
|
||||
'info_dict': {
|
||||
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
|
||||
'info_dict': {
|
||||
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
|
||||
'ext': 'mp4',
|
||||
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
|
||||
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
|
||||
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
|
||||
'duration': 173.0,
|
||||
'timestamp': 1729122182,
|
||||
'upload_date': '20241016',
|
||||
'modified_timestamp': 1729194706,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'md5': '11604ab4af83b650826753f1ccb8ecff',
|
||||
'info_dict': {
|
||||
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
|
||||
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
|
||||
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
|
||||
'duration': 145.0,
|
||||
'timestamp': 1729137765,
|
||||
'upload_date': '20241017',
|
||||
'modified_timestamp': 1729138184,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}],
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = self._match_valid_url(url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
display_id = self._match_valid_url(url).group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_id = traverse_obj(
|
||||
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
|
||||
('TOP_AUTH_SERVICE_APP_ID', {str}))
|
||||
|
||||
entries = []
|
||||
for player_data in traverse_obj(webpage, (
|
||||
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
|
||||
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
|
||||
media_id = player_data['data-media-id']
|
||||
parent_uri = player_data.get('data-video-resource-parent-uri')
|
||||
formats, subtitles = [], {}
|
||||
|
||||
video_data = {}
|
||||
if parent_uri:
|
||||
video_data = self._download_json(
|
||||
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
|
||||
query={
|
||||
'id': media_id,
|
||||
'stellarUri': parent_uri,
|
||||
})
|
||||
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
|
||||
resolution, bitrate = None, None
|
||||
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
|
||||
resolution, bitrate = mobj.group('res', 'tbr')
|
||||
formats.append({
|
||||
'url': direct_url,
|
||||
'format_id': 'direct',
|
||||
'quality': 1,
|
||||
'tbr': int_or_none(bitrate),
|
||||
**parse_resolution(resolution),
|
||||
})
|
||||
for sub_data in traverse_obj(video_data, (
|
||||
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
|
||||
'url': sub_data['url'],
|
||||
'name': sub_data.get('label'),
|
||||
})
|
||||
|
||||
if app_id:
|
||||
media_data = self._download_json(
|
||||
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
|
||||
query={'appId': app_id})
|
||||
m3u8_url = traverse_obj(media_data, (
|
||||
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
|
||||
if m3u8_url:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
entries.append({
|
||||
**traverse_obj(player_data, {
|
||||
'title': ('data-headline', {clean_html}),
|
||||
'description': ('data-description', {clean_html}),
|
||||
'duration': ('data-duration', {parse_duration}),
|
||||
'timestamp': ('data-publish-date', {parse_iso8601}),
|
||||
'thumbnail': (
|
||||
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
|
||||
{functools.partial(update_url, query='c=original')}),
|
||||
'display_id': 'data-video-slug',
|
||||
}),
|
||||
**traverse_obj(video_data, {
|
||||
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
|
||||
'description': ('description', {clean_html}),
|
||||
'title': ('headline', {str}),
|
||||
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
|
||||
'duration': ('trt', {int_or_none}),
|
||||
}),
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||
_TEST = {
|
||||
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminalizing journalism?',
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
|
||||
class CNNIndonesiaIE(InfoExtractor):
|
||||
|
||||
@@ -35,6 +35,7 @@ from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
TransportError,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
@@ -46,6 +47,7 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
ISO639Utils,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
@@ -234,7 +236,14 @@ class InfoExtractor:
|
||||
'maybe' if the format may have DRM and has to be tested before download.
|
||||
* extra_param_to_segment_url A query string to append to each
|
||||
fragment's URL, or to update each existing query string
|
||||
with. Only applied by the native HLS/DASH downloaders.
|
||||
with. If it is an HLS stream with an AES-128 decryption key,
|
||||
the query paramaters will be passed to the key URI as well,
|
||||
unless there is an `extra_param_to_key_url` given,
|
||||
or unless an external key URI is provided via `hls_aes`.
|
||||
Only applied by the native HLS/DASH downloaders.
|
||||
* extra_param_to_key_url A query string to append to the URL
|
||||
of the format's HLS AES-128 decryption key.
|
||||
Only applied by the native HLS downloader.
|
||||
* hls_aes A dictionary of HLS AES-128 decryption information
|
||||
used by the native HLS downloader to override the
|
||||
values in the media playlist when an '#EXT-X-KEY' tag
|
||||
@@ -325,7 +334,7 @@ class InfoExtractor:
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||
average_rating: Average rating given by users, the scale used depends on the webpage
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
properties (all but one of text or html optional):
|
||||
@@ -512,7 +521,7 @@ class InfoExtractor:
|
||||
or _extract_from_webpage as necessary. While these are normally classmethods,
|
||||
_extract_from_webpage is allowed to be an instance method.
|
||||
|
||||
_extract_from_webpage may raise self.StopExtraction() to stop further
|
||||
_extract_from_webpage may raise self.StopExtraction to stop further
|
||||
processing of the webpage and obtain exclusive rights to it. This is useful
|
||||
when the extractor cannot reliably be matched using just the URL,
|
||||
e.g. invidious/peertube instances
|
||||
@@ -565,13 +574,13 @@ class InfoExtractor:
|
||||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}',
|
||||
'password': f'Use {password_hint}',
|
||||
'cookies': (
|
||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
|
||||
'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}',
|
||||
'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}',
|
||||
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
@@ -958,6 +967,9 @@ class InfoExtractor:
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
if content is False:
|
||||
assert not fatal
|
||||
return False
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
@@ -1032,7 +1044,15 @@ class InfoExtractor:
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
try:
|
||||
webpage_bytes = urlh.read()
|
||||
except TransportError as err:
|
||||
errmsg = f'{video_id}: Error reading response: {err.msg}'
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, cause=err)
|
||||
self.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
@@ -1389,6 +1409,13 @@ class InfoExtractor:
|
||||
return None, None
|
||||
|
||||
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
||||
|
||||
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
|
||||
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
|
||||
if sys.version_info < (3, 11):
|
||||
return tuple(x if x != '""' else '' for x in info[::2])
|
||||
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
@@ -1691,7 +1718,7 @@ class InfoExtractor:
|
||||
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
|
||||
if rating is not None:
|
||||
info['average_rating'] = rating
|
||||
if is_type(e, 'TVEpisode', 'Episode'):
|
||||
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': episode_name,
|
||||
@@ -2058,7 +2085,7 @@ class InfoExtractor:
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
@@ -2215,6 +2242,11 @@ class InfoExtractor:
|
||||
'quality': quality,
|
||||
'has_drm': has_drm,
|
||||
}
|
||||
|
||||
# YouTube-specific
|
||||
if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
|
||||
f['language'] = yt_audio_content_id.split('.')[0]
|
||||
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
@@ -2788,11 +2820,11 @@ class InfoExtractor:
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if try_call(lambda: base_url_e.text) is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
if re.match(r'https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and base_url.startswith('/'):
|
||||
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
||||
elif mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
elif mpd_base_url and not re.match(r'https?://', base_url):
|
||||
if not mpd_base_url.endswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
@@ -2882,7 +2914,7 @@ class InfoExtractor:
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
return 'url' if re.match(r'https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
@@ -3047,7 +3079,11 @@ class InfoExtractor:
|
||||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
stream_language = stream.get('Language', 'und')
|
||||
# IsmFD expects ISO 639 Set 2 language codes (3-character length)
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
|
||||
stream_language = stream.get('Language') or 'und'
|
||||
if len(stream_language) != 3:
|
||||
stream_language = ISO639Utils.short2long(stream_language) or 'und'
|
||||
for track in stream.findall('QualityLevel'):
|
||||
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
|
||||
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
|
||||
@@ -3138,7 +3174,7 @@ class InfoExtractor:
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None):
|
||||
def absolute_url(item_url):
|
||||
return urljoin(base_url, item_url)
|
||||
|
||||
@@ -3162,11 +3198,11 @@ class InfoExtractor:
|
||||
formats = self._extract_m3u8_formats(
|
||||
full_url, video_id, ext='mp4',
|
||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
||||
preference=preference, quality=quality, fatal=False)
|
||||
preference=preference, quality=quality, fatal=False, headers=_headers)
|
||||
elif ext == 'mpd':
|
||||
is_plain_url = False
|
||||
formats = self._extract_mpd_formats(
|
||||
full_url, video_id, mpd_id=mpd_id, fatal=False)
|
||||
full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers)
|
||||
else:
|
||||
is_plain_url = True
|
||||
formats = [{
|
||||
@@ -3260,6 +3296,8 @@ class InfoExtractor:
|
||||
})
|
||||
for f in media_info['formats']:
|
||||
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||
if _headers:
|
||||
f['http_headers'].update(_headers)
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
@@ -3475,7 +3513,7 @@ class InfoExtractor:
|
||||
continue
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
|
||||
@@ -6,12 +6,37 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/all-american-homecoming/ready-or-not/?play=d848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'info_dict': {
|
||||
'id': 'd848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ready Or Not',
|
||||
'description': 'Simone is concerned about changes taking place at Bringston; JR makes a decision about his future.',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 2547,
|
||||
'timestamp': 1720519200,
|
||||
'uploader': 'CWTV',
|
||||
'chapters': 'count:6',
|
||||
'series': 'All American: Homecoming',
|
||||
'season_number': 3,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20240709',
|
||||
'season': 'Season 3',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
@@ -69,13 +94,14 @@ class CWTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)
|
||||
f'https://images.cwtv.com/feed/mobileapp/video-meta/apiversion_12/guid_{video_id}', video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U'
|
||||
mpx_url = update_url_query(
|
||||
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
|
||||
{'formats': 'M3U+none'})
|
||||
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
|
||||
@@ -2,6 +2,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -52,7 +53,7 @@ class DailyMailIE(InfoExtractor):
|
||||
is_hls = container == 'M2TS'
|
||||
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
|
||||
formats.append({
|
||||
'format_id': ('hls' if is_hls else protocol) + (f'-{tbr}' if tbr else ''),
|
||||
'format_id': join_nonempty('hls' if is_hls else protocol, tbr),
|
||||
'url': rendition_url,
|
||||
'width': int_or_none(rendition.get('frameWidth')),
|
||||
'height': int_or_none(rendition.get('frameHeight')),
|
||||
|
||||
@@ -1,17 +1,20 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_resolution,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DigitalConcertHallIE(InfoExtractor):
|
||||
IE_DESC = 'DigitalConcertHall extractor'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'digitalconcerthall'
|
||||
_TESTS = [{
|
||||
@@ -26,7 +29,8 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'upload_date': '20210624',
|
||||
'timestamp': 1624548600,
|
||||
'duration': 2798,
|
||||
'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
|
||||
'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
|
||||
'composers': ['Kurt Weill'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -34,8 +38,9 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'url': 'https://www.digitalconcerthall.com/en/concert/53785',
|
||||
'info_dict': {
|
||||
'id': '53785',
|
||||
'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
|
||||
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
|
||||
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 3,
|
||||
@@ -49,39 +54,59 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
'upload_date': '20220714',
|
||||
'timestamp': 1657785600,
|
||||
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
|
||||
'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'Concert with several works and an interview',
|
||||
'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
|
||||
'info_dict': {
|
||||
'id': '53785',
|
||||
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
|
||||
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
token_response = self._download_json(
|
||||
login_token = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
||||
'affiliate': 'none',
|
||||
'grant_type': 'device',
|
||||
'device_vendor': 'unknown',
|
||||
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio
|
||||
'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari',
|
||||
'app_id': 'dch.webapp',
|
||||
'app_version': '1.0.0',
|
||||
'app_distributor': 'berlinphil',
|
||||
'app_version': '1.84.0',
|
||||
'client_secret': '2ySLN+2Fwb',
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
self._ACCESS_TOKEN = token_response['access_token']
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})['access_token']
|
||||
try:
|
||||
self._download_json(
|
||||
login_response = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'Referer': 'https://www.digitalconcerthall.com',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
'Authorization': f'Bearer {login_token}',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
except ExtractorError:
|
||||
self.raise_login_required(msg='Login info incorrect')
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
raise
|
||||
self._ACCESS_TOKEN = login_response['access_token']
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._ACCESS_TOKEN:
|
||||
@@ -95,17 +120,20 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
|
||||
m3u8_url = traverse_obj(
|
||||
stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
formats = []
|
||||
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for fmt in formats:
|
||||
if fmt.get('format_note') and fmt.get('vcodec') == 'none':
|
||||
fmt.update(parse_codecs(fmt['format_note']))
|
||||
|
||||
yield {
|
||||
'id': video_id,
|
||||
'title': item.get('title'),
|
||||
'composer': item.get('name_composer'),
|
||||
'url': m3u8_url,
|
||||
'formats': formats,
|
||||
'duration': item.get('duration_total'),
|
||||
'timestamp': traverse_obj(item, ('date', 'published')),
|
||||
@@ -119,31 +147,34 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
|
||||
language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part')
|
||||
if not language:
|
||||
language = 'en'
|
||||
|
||||
thumbnail_url = self._html_search_regex(
|
||||
r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
|
||||
self._download_webpage(url, video_id), 'thumbnail')
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
**parse_resolution(thumbnail_url),
|
||||
}]
|
||||
|
||||
api_type = 'concert' if type_ == 'work' else type_
|
||||
vid_info = self._download_json(
|
||||
f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
|
||||
f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
})
|
||||
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
|
||||
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||
|
||||
if type_ == 'work':
|
||||
videos = [videos[int(part) - 1]]
|
||||
|
||||
album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name', {str}))
|
||||
thumbnail = traverse_obj(vid_info, (
|
||||
'image', ..., {self._proto_relative_url}, {url_or_none},
|
||||
{lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': vid_info.get('title'),
|
||||
'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
|
||||
'thumbnails': thumbnails,
|
||||
'album_artist': album_artist,
|
||||
'entries': self._entries(
|
||||
videos, language, type_, thumbnail=thumbnail, album_artists=album_artists),
|
||||
'thumbnail': thumbnail,
|
||||
'album_artists': album_artists,
|
||||
}
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
import random
|
||||
import string
|
||||
import urllib.parse
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
go\.discovery|
|
||||
www\.
|
||||
(?:
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc
|
||||
)|
|
||||
watch\.
|
||||
(?:
|
||||
hgtv|
|
||||
foodnetwork|
|
||||
travelchannel|
|
||||
diynetwork|
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
|
||||
'info_dict': {
|
||||
'id': '5a2f35ce6b66d17a5026e29e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Riding with Matthew Perry',
|
||||
'description': 'md5:a34333153e79bc4526019a5129e7f878',
|
||||
'duration': 84,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# using `show_slug` is important to get the correct video data
|
||||
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_GEO_BYPASS = False
|
||||
_API_BASE_URL = 'https://api.discovery.com/v1/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, show_slug, display_id = self._match_valid_url(url).groups()
|
||||
|
||||
access_token = None
|
||||
cookies = self._get_cookies(url)
|
||||
|
||||
# prefer Affiliate Auth Token over Anonymous Auth Token
|
||||
auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
|
||||
if auth_storage_cookie and auth_storage_cookie.value:
|
||||
auth_storage = self._parse_json(urllib.parse.unquote(
|
||||
urllib.parse.unquote(auth_storage_cookie.value)),
|
||||
display_id, fatal=False) or {}
|
||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
f'https://{site}.com/anonymous', display_id,
|
||||
'Downloading token JSON metadata', query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join(random.choices(string.ascii_letters, k=32)),
|
||||
'redirectUri': 'https://www.discovery.com/',
|
||||
})['access_token']
|
||||
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
try:
|
||||
video = self._download_json(
|
||||
self._API_BASE_URL + 'content/videos',
|
||||
display_id, 'Downloading content JSON metadata',
|
||||
headers=headers, query={
|
||||
'embed': 'show.name',
|
||||
'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
|
||||
'slug': display_id,
|
||||
'show_slug': show_slug,
|
||||
})[0]
|
||||
video_id = video['id']
|
||||
stream = self._download_json(
|
||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
e.cause.response.read().decode(), display_id)['description']
|
||||
if 'resource not available for country' in e_description:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
if 'Authorized Networks' in e_description:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
raise ExtractorError(e_description)
|
||||
raise
|
||||
|
||||
return self._extract_video_info(video, stream, display_id)
|
||||
@@ -1,171 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DiscoveryGoBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocitychannel
|
||||
)go\.com/%s(?P<id>[^/?#&]+)'''
|
||||
|
||||
def _extract_video_info(self, video, stream, display_id):
|
||||
title = video['name']
|
||||
|
||||
if not stream:
|
||||
if video.get('authenticated') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream')
|
||||
STREAM_URL_SUFFIX = 'streamUrl'
|
||||
formats = []
|
||||
for stream_kind in ('', 'hds'):
|
||||
suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
|
||||
stream_url = stream.get(f'{stream_kind}{suffix}')
|
||||
if not stream_url:
|
||||
continue
|
||||
if stream_kind == '':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif stream_kind == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, display_id, f4m_id=stream_kind, fatal=False))
|
||||
|
||||
video_id = video.get('id') or display_id
|
||||
description = video.get('description', {}).get('detailed')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
series = video.get('show', {}).get('name')
|
||||
season_number = int_or_none(video.get('season', {}).get('number'))
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
|
||||
tags = video.get('tags')
|
||||
age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
|
||||
|
||||
subtitles = {}
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
ext = determine_ext(subtitle_url)
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'tags': tags,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryGoIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
|
||||
'info_dict': {
|
||||
'id': '58c167d86b66d12f2addeb01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reaper Madness',
|
||||
'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
|
||||
'duration': 2519,
|
||||
'series': 'Bering Sea Gold',
|
||||
'season_number': 8,
|
||||
'episode_number': 6,
|
||||
'age_limit': 14,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
container = extract_attributes(
|
||||
self._search_regex(
|
||||
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
|
||||
webpage, 'video container'))
|
||||
|
||||
video = self._parse_json(
|
||||
container.get('data-video') or container.get('data-json'),
|
||||
display_id)
|
||||
|
||||
stream = video.get('stream')
|
||||
|
||||
return self._extract_video_info(video, stream, display_id)
|
||||
|
||||
|
||||
class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/bering-sea-gold/',
|
||||
'info_dict': {
|
||||
'id': 'bering-sea-gold',
|
||||
'title': 'Bering Sea Gold',
|
||||
'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if DiscoveryGoIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
|
||||
data = self._parse_json(
|
||||
mobj.group('json'), display_id,
|
||||
transform_source=unescapeHTML, fatal=False)
|
||||
if not isinstance(data, dict) or data.get('type') != 'episode':
|
||||
continue
|
||||
episode_url = data.get('socialUrl')
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
episode_url, ie=DiscoveryGoIE.ie_key(),
|
||||
video_id=data.get('id')))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, display_id,
|
||||
remove_end(self._og_search_title(
|
||||
webpage, fatal=False), ' | Discovery GO'),
|
||||
self._og_search_description(webpage))
|
||||
@@ -24,8 +24,9 @@ from ..utils import (
|
||||
class DouyuBaseIE(InfoExtractor):
|
||||
def _download_cryptojs_md5(self, video_id):
|
||||
for url in [
|
||||
# XXX: Do NOT use cdn.bootcdn.net; ref: https://sansec.io/research/polyfill-supply-chain-attack
|
||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://unpkg.com/cryptojslib@3.1.2/rollups/md5.js',
|
||||
]:
|
||||
js_code = self._download_webpage(
|
||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||
@@ -35,7 +36,8 @@ class DouyuBaseIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||
|
||||
def _get_cryptojs_md5(self, video_id):
|
||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||
return self.cache.load(
|
||||
'douyu', 'crypto-js-md5', min_ver='2024.07.04') or self._download_cryptojs_md5(video_id)
|
||||
|
||||
def _calc_sign(self, sign_func, video_id, a):
|
||||
b = uuid.uuid4().hex
|
||||
|
||||
@@ -319,35 +319,17 @@ class DPlayIE(DPlayBaseIE):
|
||||
url, display_id, host, 'dplay' + country, country, domain)
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
|
||||
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
|
||||
|
||||
_DISCO_CLIENT_VER = '27.43.0'
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6'
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:{self._DISCO_CLIENT_VER}',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
@@ -365,9 +347,68 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class HGTVDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'info_dict': {
|
||||
'id': '7332936',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'title': 'Vom Landleben ins Loft',
|
||||
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
|
||||
'episode': 'Episode 7',
|
||||
'episode_number': 7,
|
||||
'season': 'Season 7',
|
||||
'season_number': 7,
|
||||
'series': 'Mein Kleinstadt-Traumhaus',
|
||||
'duration': 2645.0,
|
||||
'timestamp': 1725998100,
|
||||
'upload_date': '20240910',
|
||||
'creators': ['HGTV'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
|
||||
},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod.disco-api.com',
|
||||
'realm': 'hgtv',
|
||||
'country': 'de',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.discovery.com/video/in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister',
|
||||
'info_dict': {
|
||||
'id': '5352642',
|
||||
'display_id': 'in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trapped in a Twister',
|
||||
'description': 'Twisters destroy Midwest towns, trapping spotters in the eye of the storm.',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'series': 'In The Eye Of The Storm',
|
||||
'duration': 2490.237,
|
||||
'upload_date': '20240715',
|
||||
'timestamp': 1721008800,
|
||||
'tags': [],
|
||||
'creators': ['Discovery'],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/10/5e39637d-cabf-3ab3-8e9a-f4e9d37bc036.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'info_dict': {
|
||||
'id': '4164906',
|
||||
@@ -395,6 +436,26 @@ class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.travelchannel.com/video/the-dead-files-travel-channel/protect-the-children',
|
||||
'info_dict': {
|
||||
'id': '4710177',
|
||||
'display_id': 'the-dead-files-travel-channel/protect-the-children',
|
||||
'ext': 'mp4',
|
||||
'title': 'Protect the Children',
|
||||
'description': 'An evil presence threatens an Ohio woman\'s children and marriage.',
|
||||
'season_number': 14,
|
||||
'season': 'Season 14',
|
||||
'episode_number': 10,
|
||||
'episode': 'Episode 10',
|
||||
'series': 'The Dead Files',
|
||||
'duration': 2550.481,
|
||||
'timestamp': 1664510400,
|
||||
'upload_date': '20220930',
|
||||
'tags': [],
|
||||
'creators': ['Travel Channel'],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/17/5e45eace-de5d-343a-9293-f400a2aa77d5.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'info_dict': {
|
||||
'id': '2220256',
|
||||
@@ -422,6 +483,26 @@ class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||
class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.cookingchanneltv.com/video/bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson',
|
||||
'info_dict': {
|
||||
'id': '5350005',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson',
|
||||
'title': 'Titans vs Marcus Samuelsson',
|
||||
'description': 'Marcus Samuelsson throws his legendary global tricks at the Titans.',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 3,
|
||||
'season': 'Season 3',
|
||||
'series': 'Bobby\'s Triple Threat',
|
||||
'duration': 2520.851,
|
||||
'upload_date': '20240710',
|
||||
'timestamp': 1720573200,
|
||||
'tags': [],
|
||||
'creators': ['Food Network'],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/04/529cd095-27ec-35c5-84e9-90ebd3e5d2da.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'info_dict': {
|
||||
'id': '2348634',
|
||||
@@ -449,6 +530,22 @@ class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||
class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.hgtv.com/video/flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip',
|
||||
'info_dict': {
|
||||
'id': '5025585',
|
||||
'display_id': 'flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flip or Flop: The Final Flip',
|
||||
'description': 'Tarek and Christina are going their separate ways after one last flip!',
|
||||
'series': 'Flip or Flop: The Final Flip',
|
||||
'duration': 2580.644,
|
||||
'upload_date': '20231101',
|
||||
'timestamp': 1698811200,
|
||||
'tags': [],
|
||||
'creators': ['HGTV'],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/11/27/455caa6c-1462-3f14-b63d-a026d7a5e6d3.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'info_dict': {
|
||||
'id': '4289736',
|
||||
@@ -476,6 +573,26 @@ class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||
class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.foodnetwork.com/video/guys-grocery-games-food-network/wild-in-the-aisles',
|
||||
'info_dict': {
|
||||
'id': '2152549',
|
||||
'display_id': 'guys-grocery-games-food-network/wild-in-the-aisles',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wild in the Aisles',
|
||||
'description': 'The chefs make spaghetti and meatballs with "Out of Stock" ingredients.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'Guy\'s Grocery Games',
|
||||
'tags': [],
|
||||
'creators': ['Food Network'],
|
||||
'duration': 2520.651,
|
||||
'upload_date': '20230623',
|
||||
'timestamp': 1687492800,
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/06/15/37fb5333-cad2-3dbb-af7c-c20ec77c89c6.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'info_dict': {
|
||||
'id': '4116449',
|
||||
@@ -503,6 +620,26 @@ class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||
class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.destinationamerica.com/video/bbq-pit-wars-destination-america/smoke-on-the-water',
|
||||
'info_dict': {
|
||||
'id': '2218409',
|
||||
'display_id': 'bbq-pit-wars-destination-america/smoke-on-the-water',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoke on the Water',
|
||||
'description': 'The pitmasters head to Georgia for the Smoke on the Water BBQ Festival.',
|
||||
'season_number': 2,
|
||||
'season': 'Season 2',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'BBQ Pit Wars',
|
||||
'tags': [],
|
||||
'creators': ['Destination America'],
|
||||
'duration': 2614.878,
|
||||
'upload_date': '20230623',
|
||||
'timestamp': 1687492800,
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/c0f8e85d-9a10-3e6f-8e43-f6faafa81ba2.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'info_dict': {
|
||||
'id': '4210904',
|
||||
@@ -530,6 +667,26 @@ class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||
class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.investigationdiscovery.com/video/deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca',
|
||||
'info_dict': {
|
||||
'id': '5341132',
|
||||
'display_id': 'deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca',
|
||||
'ext': 'mp4',
|
||||
'title': 'RIP Bianca',
|
||||
'description': 'A teenage influencer discovers an online world of threat, harm and danger.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 3,
|
||||
'episode': 'Episode 3',
|
||||
'series': 'Deadly Influence: The Social Media Murders',
|
||||
'creators': ['Investigation Discovery'],
|
||||
'tags': [],
|
||||
'duration': 2490.888,
|
||||
'upload_date': '20240618',
|
||||
'timestamp': 1718672400,
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/06/15/b567c774-9e44-3c6c-b0ba-db860a73e812.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||
'info_dict': {
|
||||
'id': '2139409',
|
||||
@@ -557,6 +714,26 @@ class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ahctv.com/video/blood-and-fury-americas-civil-war-ahc/battle-of-bull-run',
|
||||
'info_dict': {
|
||||
'id': '2139199',
|
||||
'display_id': 'blood-and-fury-americas-civil-war-ahc/battle-of-bull-run',
|
||||
'ext': 'mp4',
|
||||
'title': 'Battle of Bull Run',
|
||||
'description': 'Two untested armies clash in the first real battle of the Civil War.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'Blood and Fury: America\'s Civil War',
|
||||
'duration': 2612.509,
|
||||
'upload_date': '20220923',
|
||||
'timestamp': 1663905600,
|
||||
'creators': ['AHC'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/4af61bd7-d705-3108-82c4-1a6e541e20fa.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
@@ -584,6 +761,26 @@ class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||
class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.sciencechannel.com/video/spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets',
|
||||
'info_dict': {
|
||||
'id': '2347335',
|
||||
'display_id': 'spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mystery of the Dead Planets',
|
||||
'description': 'Astronomers unmask the truly destructive nature of the cosmos.',
|
||||
'season_number': 7,
|
||||
'season': 'Season 7',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'Space\'s Deepest Secrets',
|
||||
'duration': 2524.989,
|
||||
'upload_date': '20230128',
|
||||
'timestamp': 1674882000,
|
||||
'creators': ['Science'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/30/3796829d-aead-3f9a-bd8d-e49048b3cdca.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'info_dict': {
|
||||
'id': '2842849',
|
||||
@@ -608,36 +805,29 @@ class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class DIYNetworkIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bringing Beach Life to Texas',
|
||||
'description': 'The Pool Kings give a family a day at the beach in their own backyard.',
|
||||
'season_number': 10,
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'diy'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.diynetwork.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoverylife.com/video/er-files-discovery-life-atve-us/sweet-charity',
|
||||
'info_dict': {
|
||||
'id': '2347614',
|
||||
'display_id': 'er-files-discovery-life-atve-us/sweet-charity',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sweet Charity',
|
||||
'description': 'The staff at Charity Hospital treat a serious foot infection.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': 'ER Files',
|
||||
'duration': 2364.261,
|
||||
'upload_date': '20230721',
|
||||
'timestamp': 1689912000,
|
||||
'creators': ['Discovery Life'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/16/4b6f0124-360b-3546-b6a4-5552db886b86.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'info_dict': {
|
||||
'id': '2218238',
|
||||
@@ -665,6 +855,26 @@ class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||
class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.animalplanet.com/video/mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru',
|
||||
'info_dict': {
|
||||
'id': '4650835',
|
||||
'display_id': 'mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Demon of Peru',
|
||||
'description': 'In Peru, a farming village is being terrorized by a “man-like beast.”',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 4,
|
||||
'episode': 'Episode 4',
|
||||
'series': 'Mysterious Creatures with Forrest Galante',
|
||||
'duration': 2490.488,
|
||||
'upload_date': '20230111',
|
||||
'timestamp': 1673413200,
|
||||
'creators': ['Animal Planet'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/01/6dbaa833-9a2e-3fee-9381-c19eddf67c0c.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
||||
'info_dict': {
|
||||
'id': '3338923',
|
||||
@@ -692,6 +902,26 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
class TLCIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.tlc.com/video/90-day-the-last-resort-tlc-atve-us/the-last-chance',
|
||||
'info_dict': {
|
||||
'id': '5186422',
|
||||
'display_id': '90-day-the-last-resort-tlc-atve-us/the-last-chance',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Last Chance',
|
||||
'description': 'Infidelity shakes Kalani and Asuelu\'s world, and Angela threatens divorce.',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'series': '90 Day: The Last Resort',
|
||||
'duration': 5123.91,
|
||||
'upload_date': '20230815',
|
||||
'timestamp': 1692061200,
|
||||
'creators': ['TLC'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2023/08/08/0ee367e2-ac76-334d-bf23-dbf796696a24.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'info_dict': {
|
||||
'id': '2206540',
|
||||
@@ -716,93 +946,8 @@ class TLCIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class MotorTrendIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
|
||||
'info_dict': {
|
||||
'id': '"4859182"',
|
||||
'display_id': 'double-dakotas',
|
||||
'ext': 'mp4',
|
||||
'title': 'Double Dakotas',
|
||||
'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
|
||||
'season_number': 2,
|
||||
'episode_number': 3,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'vel'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.motortrend.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
|
||||
'info_dict': {
|
||||
'id': '37699',
|
||||
'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback',
|
||||
'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7',
|
||||
'season_number': 5,
|
||||
'episode_number': 52,
|
||||
'episode': 'Episode 52',
|
||||
'season': 'Season 5',
|
||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
||||
'timestamp': 1388534401,
|
||||
'duration': 1887.345,
|
||||
'creator': 'Originals',
|
||||
'series': 'Roadkill',
|
||||
'upload_date': '20140101',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
|
||||
'info_dict': {
|
||||
'id': '4922860',
|
||||
'ext': 'mp4',
|
||||
'title': 'Roadworthy Rescues | Teaser Trailer',
|
||||
'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
|
||||
'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
|
||||
'creator': 'Originals',
|
||||
'series': 'Roadworthy Rescues',
|
||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
||||
'upload_date': '20220907',
|
||||
'timestamp': 1662523200,
|
||||
'duration': 1066.356,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'MTOD'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.motortrendondemand.com',
|
||||
'realm': 'motortrend',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
'info_dict': {
|
||||
@@ -823,14 +968,45 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
}, {
|
||||
'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus_us'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.discoveryplus.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
_PRODUCT = None
|
||||
_DISCO_API_PARAMS = None
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, country = self._match_valid_url(url).group('id', 'country')
|
||||
if not country:
|
||||
country = 'us'
|
||||
|
||||
self._PRODUCT = f'dplus_{country}'
|
||||
|
||||
if country in ('br', 'ca', 'us'):
|
||||
self._DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.discoveryplus.com',
|
||||
'realm': 'go',
|
||||
'country': country,
|
||||
}
|
||||
else:
|
||||
self._DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod-direct.discoveryplus.com',
|
||||
'realm': 'dplay',
|
||||
'country': country,
|
||||
}
|
||||
|
||||
return self._get_disco_api_info(url, video_id, **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
|
||||
@@ -984,16 +1160,22 @@ class DiscoveryPlusShowBaseIE(DPlayBaseIE):
|
||||
|
||||
|
||||
class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/olympics/dplus-sport-dplus-sport-sport/water-polo-greece-italy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.com/it/video/sport/dplus-sport-dplus-sport-sport/lisa-vittozzi-allinferno-e-ritorno',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dplus_us'
|
||||
_PRODUCT = 'dplus_it'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod-direct.discoveryplus.com',
|
||||
'realm': 'dplay',
|
||||
@@ -1002,8 +1184,8 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6',
|
||||
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
@@ -1044,39 +1226,3 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
|
||||
_SHOW_STR = 'show'
|
||||
_INDEX = 4
|
||||
_VIDEO_IE = DiscoveryPlusIndiaIE
|
||||
|
||||
|
||||
class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
|
||||
'info_dict': {
|
||||
'id': '1397691',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Athertons: Mountain Biking\'s Fastest Family',
|
||||
'description': 'md5:75a81937fcd8b989eec6083a709cd837',
|
||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
|
||||
'series': 'gcn',
|
||||
'creator': 'Gcn',
|
||||
'upload_date': '20210309',
|
||||
'timestamp': 1615248000,
|
||||
'duration': 2531.0,
|
||||
'tags': [],
|
||||
},
|
||||
'skip': 'Subscription required',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'web'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
|
||||
'realm': 'gcn',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
@@ -6,8 +6,10 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,43 +38,58 @@ class DropboxIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def _yield_decoded_parts(self, webpage):
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = urllib.parse.unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if '/sm/password' in part:
|
||||
webpage = self._download_webpage(
|
||||
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
|
||||
break
|
||||
|
||||
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
if password:
|
||||
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
|
||||
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
|
||||
'password': password,
|
||||
'url': url,
|
||||
}))
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Authentication failed!', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
else:
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles, has_anonymous_download = [], {}, False
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
formats, subtitles = [], {}
|
||||
has_anonymous_download = False
|
||||
thumbnail = None
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if not has_anonymous_download:
|
||||
has_anonymous_download = self._search_regex(
|
||||
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
thumbnail = self._search_regex(
|
||||
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
|
||||
break
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
@@ -89,4 +106,5 @@ class DropboxIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
@@ -139,12 +139,11 @@ class DRTVIE(InfoExtractor):
|
||||
return
|
||||
|
||||
token_response = self._download_json(
|
||||
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
'https://isl.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
note='Downloading anonymous token', headers={
|
||||
'content-type': 'application/json',
|
||||
}, query={
|
||||
'device': 'web_browser',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'device': 'phone_android',
|
||||
'lang': 'da',
|
||||
'supportFallbackToken': 'true',
|
||||
}, data=json.dumps({
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
@@ -37,12 +42,9 @@ class EitbIE(InfoExtractor):
|
||||
if not video_url:
|
||||
continue
|
||||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += f'-{int(tbr)}'
|
||||
formats.append({
|
||||
'url': rendition['PMD_URL'],
|
||||
'format_id': format_id,
|
||||
'format_id': join_nonempty('http', int_or_none(tbr)),
|
||||
'width': int_or_none(rendition.get('FRAME_WIDTH')),
|
||||
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
|
||||
'tbr': tbr,
|
||||
|
||||
@@ -2,6 +2,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
@@ -13,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class EpidemicSoundIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/(?:(?P<sfx>sound-effects/tracks)|track)/(?P<id>[0-9a-zA-Z-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
|
||||
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
|
||||
@@ -47,6 +48,20 @@ class EpidemicSoundIE(InfoExtractor):
|
||||
'release_timestamp': 1700535606,
|
||||
'release_date': '20231121',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.epidemicsound.com/sound-effects/tracks/2f02f54b-9faa-4daf-abac-1cfe9e9cef69/',
|
||||
'md5': '35d7cf05bd8b614a84f0495a05de9388',
|
||||
'info_dict': {
|
||||
'id': '208931',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20240603',
|
||||
'timestamp': 1717436529,
|
||||
'categories': ['appliance'],
|
||||
'display_id': '6b2NXLURPr',
|
||||
'duration': 1.0,
|
||||
'title': 'Oven, Grill, Door Open 01',
|
||||
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -77,8 +92,10 @@ class EpidemicSoundIE(InfoExtractor):
|
||||
return f
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id)
|
||||
video_id, is_sfx = self._match_valid_url(url).group('id', 'sfx')
|
||||
json_data = self._download_json(join_nonempty(
|
||||
'https://www.epidemicsound.com/json/track',
|
||||
is_sfx and 'kosmos-id', video_id, delim='/'), video_id)
|
||||
|
||||
thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
|
||||
thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
|
||||
|
||||
@@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118',
|
||||
},
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
@@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
|
||||
def _extract_formats_and_subs(self, video_id):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(media_info, (
|
||||
'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
|
||||
'Formats', lambda _, v: url_or_none(v['Url']))):
|
||||
fmt_url = media['Url']
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return formats, subs
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
|
||||
class WatchESPNIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'info_dict': {
|
||||
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huddersfield vs. Burnley',
|
||||
'duration': 7500,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
'title': 'Abilene Chrstn vs. Texas Tech',
|
||||
'duration': 14166,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'info_dict': {
|
||||
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
|
||||
'duration': 8335,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'UC Davis vs. California',
|
||||
'duration': 9547,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'info_dict': {
|
||||
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Wheel - Episode 10',
|
||||
'duration': 3352,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'The College Football Show',
|
||||
'duration': 3639,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
|
||||
if not cookie:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
|
||||
id_token = self._download_json(
|
||||
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
|
||||
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
|
||||
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
|
||||
}).encode())['data']['token']['id_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'devices', video_id,
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'},
|
||||
@@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
|
||||
})['access_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
|
||||
'accounts/grant', video_id, payload={'id_token': id_token},
|
||||
headers={
|
||||
'Authorization': token,
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
|
||||
@@ -3,7 +3,12 @@ from ..utils import traverse_obj
|
||||
|
||||
|
||||
class EurosportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
|
||||
eurosport\.tvn24\.pl
|
||||
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||
'info_dict': {
|
||||
@@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
|
||||
'duration': 105.0,
|
||||
'upload_date': '20230518',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = None
|
||||
@@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
|
||||
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
|
||||
# but this method require to get sha256 hash
|
||||
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_initialize(self):
|
||||
if EurosportIE._TOKEN is None:
|
||||
@@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
|
||||
for stream_type in json_data['attributes']['streaming']:
|
||||
if stream_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
|
||||
elif stream_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
elif stream_type == 'mss':
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
@@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'duration': 3133.583,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
@@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
@@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'md5': '1659aa21fb3dd1585874f668e81a72c8',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
'duration': 44.181,
|
||||
},
|
||||
}, {
|
||||
# FIXME: unable to extract uploader, no formats found
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
@@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1511548260,
|
||||
'upload_date': '20171124',
|
||||
'uploader': 'Vickie Gentry',
|
||||
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
'duration': 148.224,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
@@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
@@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'duration': 4524.001,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
@@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'duration': 3.283,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
@@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# FIXME: Cannot parse data error
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
@@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
or get_first(post, ('event', 'event_creator', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
@@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
|
||||
'like_count': ('likers', 'count', {int}),
|
||||
'comment_count': ('total_comment_count', {int}),
|
||||
'repost_count': ('share_count_reduced', {parse_count}),
|
||||
}), get_all=False),
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@@ -555,11 +564,12 @@ class FacebookIE(InfoExtractor):
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
dash_manifest = traverse_obj(
|
||||
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
mpd_url=url_or_none(video.get('dash_manifest_url'))))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
@@ -571,16 +581,21 @@ class FacebookIE(InfoExtractor):
|
||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
rf'data-sjs>({{.*?{_filter}.*?}})</script>',
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
def yield_all_relay_data(_filter):
|
||||
for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage):
|
||||
yield self._parse_json(relay_data, video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
def extract_relay_data(_filter):
|
||||
return next(filter(None, yield_all_relay_data(_filter)), {})
|
||||
|
||||
def extract_relay_prefetched_data(_filter, target_keys=None):
|
||||
path = 'data'
|
||||
if target_keys is not None:
|
||||
path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys))
|
||||
return traverse_obj(yield_all_relay_data(_filter), (
|
||||
..., 'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
||||
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
@@ -591,7 +606,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
if not video_data:
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)',
|
||||
target_keys=('video', 'event', 'nodes', 'node', 'mediaset'))
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
@@ -603,12 +619,13 @@ class FacebookIE(InfoExtractor):
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
playable_url = fmt_data.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
if determine_ext(playable_url) == 'mpd':
|
||||
@@ -620,7 +637,10 @@ class FacebookIE(InfoExtractor):
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
extract_dash_manifest(fmt_data, formats)
|
||||
if not formats:
|
||||
# Do not append false positive entry w/o any formats
|
||||
return
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||
@@ -923,18 +943,21 @@ class FacebookReelIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -954,6 +977,7 @@ class FacebookAdsIE(InfoExtractor):
|
||||
'id': '899206155126718',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by Kandao',
|
||||
'description': 'md5:0822724069e3aca97cbed5dabbab282e',
|
||||
'uploader': 'Kandao',
|
||||
'uploader_id': '774114102743284',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
@@ -962,6 +986,22 @@ class FacebookAdsIE(InfoExtractor):
|
||||
'upload_date': '20231214',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# key 'watermarked_video_sd_url' missing
|
||||
'url': 'https://www.facebook.com/ads/library/?id=501152689226254',
|
||||
'info_dict': {
|
||||
'id': '501152689226254',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by mat.nawrocki',
|
||||
'description': 'md5:02a446ace7ff8c3c37a2892922492490',
|
||||
'uploader': 'mat.nawrocki',
|
||||
'uploader_id': '148586968341456',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1723452305,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20240812',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
|
||||
'info_dict': {
|
||||
@@ -1008,34 +1048,42 @@ class FacebookAdsIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
post_data = [self._parse_json(j, video_id, fatal=False)
|
||||
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
|
||||
data = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
|
||||
post_data = traverse_obj(
|
||||
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
|
||||
data = get_first(post_data, (
|
||||
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
|
||||
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
|
||||
if not data:
|
||||
raise ExtractorError('Unable to extract ad data')
|
||||
|
||||
title = data.get('title')
|
||||
if not title or title == '{{product.name}}':
|
||||
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
|
||||
markup_id = traverse_obj(data, ('body', '__m', {str}))
|
||||
markup = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'markup', lambda _, v: v[0].startswith(markup_id),
|
||||
..., '__html', {clean_html}, {lambda x: not x.startswith('{{product.') and x}, any))
|
||||
|
||||
info_dict = traverse_obj(data, {
|
||||
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
|
||||
info_dict = merge_dicts({
|
||||
'title': title,
|
||||
'description': markup or None,
|
||||
}, traverse_obj(data, {
|
||||
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
|
||||
'uploader': ('page_name', {str}),
|
||||
'uploader_id': ('page_id', {str_or_none}),
|
||||
'uploader_url': ('page_profile_uri', {url_or_none}),
|
||||
'timestamp': ('creation_time', {int_or_none}),
|
||||
'like_count': ('page_like_count', {int_or_none}),
|
||||
})
|
||||
}))
|
||||
|
||||
entries = []
|
||||
for idx, entry in enumerate(traverse_obj(
|
||||
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1,
|
||||
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v.get(f)) for f in self._FORMATS_MAP))), 1,
|
||||
):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{idx}',
|
||||
'title': entry.get('title') or title,
|
||||
'description': entry.get('link_description') or info_dict.get('description'),
|
||||
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
|
||||
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
|
||||
'formats': self._extract_formats(entry),
|
||||
})
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
format_field,
|
||||
@@ -33,6 +34,7 @@ class FranceTVIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
# tokenized url is in dinfo['video']['token']
|
||||
'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'info_dict': {
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
@@ -44,6 +46,19 @@ class FranceTVIE(InfoExtractor):
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# tokenized url is in dinfo['video']['token']['akamai']
|
||||
'url': 'francetv:c5bda21d-2c6f-4470-8849-3d8327adb2ba',
|
||||
'info_dict': {
|
||||
'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'timestamp': 1514118300,
|
||||
'duration': 2880,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20171224',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'francetv:162311093',
|
||||
'only_matching': True,
|
||||
@@ -68,6 +83,7 @@ class FranceTVIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, hostname=None):
|
||||
is_live = None
|
||||
videos = []
|
||||
drm_formats = False
|
||||
title = None
|
||||
subtitle = None
|
||||
episode_number = None
|
||||
@@ -85,13 +101,12 @@ class FranceTVIE(InfoExtractor):
|
||||
'device_type': device_type,
|
||||
'browser': browser,
|
||||
'domain': hostname,
|
||||
}), fatal=False)
|
||||
}), fatal=False, expected_status=422) # 422 json gives detailed error code/message
|
||||
|
||||
if not dinfo:
|
||||
continue
|
||||
|
||||
video = traverse_obj(dinfo, ('video', {dict}))
|
||||
if video:
|
||||
if video := traverse_obj(dinfo, ('video', {dict})):
|
||||
videos.append(video)
|
||||
if duration is None:
|
||||
duration = video.get('duration')
|
||||
@@ -99,9 +114,19 @@ class FranceTVIE(InfoExtractor):
|
||||
is_live = video.get('is_live')
|
||||
if spritesheets is None:
|
||||
spritesheets = video.get('spritesheets')
|
||||
elif code := traverse_obj(dinfo, ('code', {int})):
|
||||
if code == 2009:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
elif code in (2015, 2017):
|
||||
# 2015: L'accès à cette vidéo est impossible. (DRM-only)
|
||||
# 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM)
|
||||
drm_formats = True
|
||||
continue
|
||||
self.report_warning(
|
||||
f'{self.IE_NAME} said: {code} "{clean_html(dinfo.get("message"))}"')
|
||||
continue
|
||||
|
||||
meta = traverse_obj(dinfo, ('meta', {dict}))
|
||||
if meta:
|
||||
if meta := traverse_obj(dinfo, ('meta', {dict})):
|
||||
if title is None:
|
||||
title = meta.get('title')
|
||||
# meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
|
||||
@@ -114,12 +139,15 @@ class FranceTVIE(InfoExtractor):
|
||||
if timestamp is None:
|
||||
timestamp = parse_iso8601(meta.get('broadcasted_at'))
|
||||
|
||||
if not videos and drm_formats:
|
||||
self.report_drm(video_id)
|
||||
|
||||
formats, subtitles, video_url = [], {}, None
|
||||
for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
|
||||
video_url = video['url']
|
||||
format_id = video.get('format')
|
||||
|
||||
if token_url := url_or_none(video.get('token')):
|
||||
if token_url := traverse_obj(video, ('token', (None, 'akamai'), {url_or_none}, any)):
|
||||
tokenized_url = traverse_obj(self._download_json(
|
||||
token_url, video_id, f'Downloading signed {format_id} manifest URL',
|
||||
fatal=False, query={
|
||||
@@ -225,13 +253,13 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'timestamp': 1502623500,
|
||||
'duration': 2580,
|
||||
'timestamp': 1514118300,
|
||||
'duration': 2880,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170813',
|
||||
'upload_date': '20171224',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
||||
@@ -3,7 +3,7 @@ from .nexx import NexxIE
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8610449476156f338761a75391b0017d',
|
||||
@@ -27,6 +27,9 @@ class FunkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -8,6 +8,9 @@ from .common import InfoExtractor
|
||||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
MEDIA_EXTENSIONS,
|
||||
@@ -43,6 +46,7 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from ..utils._utils import _UnsafeExtensionError
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2167,7 +2171,15 @@ class GenericIE(InfoExtractor):
|
||||
urllib.parse.urlparse(fragment_query).query or fragment_query
|
||||
or urllib.parse.urlparse(manifest_url).query or None)
|
||||
|
||||
hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
|
||||
key_query = self._configuration_arg('key_query', [None], casesense=True)[0]
|
||||
if key_query is not None:
|
||||
info['extra_param_to_key_url'] = (
|
||||
urllib.parse.urlparse(key_query).query or key_query
|
||||
or urllib.parse.urlparse(manifest_url).query or None)
|
||||
|
||||
def hex_or_none(value):
|
||||
return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None
|
||||
|
||||
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
|
||||
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
|
||||
}) or None
|
||||
@@ -2331,7 +2343,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
if re.match(r'[^\s/]+\.[^\s/]+/', url):
|
||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2364,6 +2376,11 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
|
||||
impersonate = self._configuration_arg('impersonate', ['false'])
|
||||
if 'false' in impersonate:
|
||||
impersonate = None
|
||||
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||
@@ -2372,10 +2389,29 @@ class GenericIE(InfoExtractor):
|
||||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}))
|
||||
try:
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||
and e.cause.response.extensions.get('impersonate') is None):
|
||||
raise
|
||||
cf_cookie_domain = traverse_obj(
|
||||
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||
('__cf_bm', 'domain'))
|
||||
if cf_cookie_domain:
|
||||
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
|
||||
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
|
||||
'how to install the required impersonation dependency, and ')
|
||||
raise ExtractorError(
|
||||
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
|
||||
|
||||
new_url = full_response.url
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
@@ -2391,7 +2427,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||
@@ -2438,9 +2474,13 @@ class GenericIE(InfoExtractor):
|
||||
if not is_html(first_bytes):
|
||||
self.report_warning(
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
ext = determine_ext(url)
|
||||
if ext not in _UnsafeExtensionError.ALLOWED_EXTENSIONS:
|
||||
ext = 'unknown_video'
|
||||
info_dict.update({
|
||||
'direct': True,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
|
||||
91
yt_dlp/extractor/germanupa.py
Normal file
91
yt_dlp/extractor/germanupa.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GermanupaIE(InfoExtractor):
|
||||
IE_DESC = 'germanupa.de'
|
||||
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
|
||||
'info_dict': {
|
||||
'id': '909179246',
|
||||
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'duration': 3987,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'audio, uses GenericIE',
|
||||
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
|
||||
'info_dict': {
|
||||
'id': '1867346676',
|
||||
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
|
||||
'ext': 'opus',
|
||||
'timestamp': 1720545088,
|
||||
'upload_date': '20240709',
|
||||
'duration': 3910.557,
|
||||
'like_count': int,
|
||||
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
|
||||
'uploader': 'German UPA',
|
||||
'repost_count': int,
|
||||
'genres': ['Science'],
|
||||
'license': 'all-rights-reserved',
|
||||
'uploader_url': 'https://soundcloud.com/user-80097677',
|
||||
'uploader_id': '471579486',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
|
||||
},
|
||||
}, {
|
||||
'note': 'Nur für Mitglieder/Just for members',
|
||||
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
|
||||
'info_dict': {
|
||||
'id': '986994430',
|
||||
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20240719',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'timestamp': 1721373980,
|
||||
'license': 'by-sa',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
|
||||
'duration': 2146,
|
||||
'release_timestamp': 1721373980,
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'upload_date': '20240719',
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
param_url = traverse_obj(
|
||||
self._search_regex(
|
||||
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
|
||||
webpage, 'embedded video', default=None, group='url'),
|
||||
({parse_qs}, 'url', 0, {url_or_none}))
|
||||
|
||||
if not param_url:
|
||||
if self._search_regex(
|
||||
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
|
||||
webpage, 'login wrapper', default=None):
|
||||
self.raise_login_required('This video is only available for members')
|
||||
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
|
||||
|
||||
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
|
||||
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)
|
||||
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
remove_start,
|
||||
@@ -287,7 +288,7 @@ class GoIE(AdobePassIE):
|
||||
if mobj:
|
||||
height = int(mobj.group(2))
|
||||
f.update({
|
||||
'format_id': (f'{format_id}-' if format_id else '') + f'{height}P',
|
||||
'format_id': join_nonempty(format_id, f'{height}P'),
|
||||
'width': int(mobj.group(1)),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GolemIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||
|
||||
32
yt_dlp/extractor/graspop.py
Normal file
32
yt_dlp/extractor/graspop.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import update_url, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GraspopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/',
|
||||
'info_dict': {
|
||||
'id': '101556',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thy Art Is Murder',
|
||||
'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
# Downgrade manifest request to avoid incomplete certificate chain error
|
||||
update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('name', {str}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
urljoin,
|
||||
xpath_element,
|
||||
@@ -69,7 +70,7 @@ class HBOBaseIE(InfoExtractor):
|
||||
height = format_info.get('height')
|
||||
fmt = {
|
||||
'url': path,
|
||||
'format_id': 'http{}'.format(f'-{height}p' if height else ''),
|
||||
'format_id': join_nonempty('http'. height and f'{height}p'),
|
||||
'width': format_info.get('width'),
|
||||
'height': height,
|
||||
}
|
||||
|
||||
@@ -44,9 +44,6 @@ class HKETVIE(InfoExtractor):
|
||||
'duration': 907,
|
||||
'subtitles': {},
|
||||
},
|
||||
'params': {
|
||||
'geo_verification_proxy': '<HK proxy here>',
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}]
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
|
||||
@@ -8,15 +8,19 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
IE_NAME = 'huya:live'
|
||||
IE_DESC = 'huya.com'
|
||||
TESTS = [{
|
||||
@@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '572329',
|
||||
'title': str,
|
||||
'ext': 'flv',
|
||||
'description': str,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
@@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
|
||||
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
|
||||
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
|
||||
return fm, ss
|
||||
|
||||
|
||||
class HuyaVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
|
||||
IE_NAME = 'huya:video'
|
||||
IE_DESC = '虎牙视频'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.huya.com/video/play/1002412640.html',
|
||||
'info_dict': {
|
||||
'id': '1002412640',
|
||||
'ext': 'mp4',
|
||||
'title': '8月3日',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 14,
|
||||
'uploader': '虎牙-ATS欧卡车队青木',
|
||||
'uploader_id': '1564376151',
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.huya.com/video/play/556054543.html',
|
||||
'info_dict': {
|
||||
'id': '556054543',
|
||||
'ext': 'mp4',
|
||||
'title': '我不挑事 也不怕事',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 1864,
|
||||
'uploader': '卡尔',
|
||||
'uploader_id': '367138632',
|
||||
'upload_date': '20210811',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url: str):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent', video_id,
|
||||
query={'videoId': video_id})['data']['moment']['videoInfo']
|
||||
|
||||
formats = []
|
||||
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': definition['url'],
|
||||
**traverse_obj(definition, {
|
||||
'format_id': ('defName', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('videoCover', {url_or_none}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'uploader': ('nickName', {str}),
|
||||
'uploader_id': ('uid', {str_or_none}),
|
||||
'upload_date': ('videoUploadTime', {unified_strdate}),
|
||||
'view_count': ('videoPlayNum', {int_or_none}),
|
||||
'comment_count': ('videoCommentNum', {int_or_none}),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ImgurIE(ImgurBaseIE):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
@@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE):
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 'MRW gifv is up and running without any bugs',
|
||||
'timestamp': 1416446068,
|
||||
'upload_date': '20141120',
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1416446068,
|
||||
'release_date': '20141120',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
'only_matching': True,
|
||||
@@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE):
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1710491255,
|
||||
'release_date': '20240315',
|
||||
'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -208,7 +225,10 @@ class ImgurIE(ImgurBaseIE):
|
||||
}), get_all=False),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': url_or_none(search('thumbnailUrl')),
|
||||
'thumbnails': [{
|
||||
'url': thumbnail_url,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}
|
||||
|
||||
@@ -252,17 +272,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE):
|
||||
|
||||
class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:gallery'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/gallery/Q95ko',
|
||||
'info_dict': {
|
||||
'id': 'Q95ko',
|
||||
'title': 'Adding faces make every GIF better',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
'skip': 'Zoinks! You\'ve taken a wrong turn.',
|
||||
}, {
|
||||
# TODO: static images - replace with animated/video gallery
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'only_matching': True,
|
||||
@@ -280,7 +292,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'release_timestamp': 1358554297,
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'release_date': '20130119',
|
||||
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# Test with slug
|
||||
'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx',
|
||||
'add_ies': ['Imgur'],
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
'timestamp': 1358554297,
|
||||
'upload_date': '20130119',
|
||||
'uploader_id': '1648642',
|
||||
'uploader': 'wittyusernamehere',
|
||||
'release_timestamp': 1358554297,
|
||||
'release_date': '20130119',
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
@@ -317,6 +349,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ',
|
||||
'info_dict': {
|
||||
'id': '6lAn9VQ',
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
|
||||
'add_ies': ['Imgur'],
|
||||
@@ -357,7 +396,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
|
||||
class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:album'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
_GALLERY = False
|
||||
_TESTS = [{
|
||||
# TODO: only static images - replace with animated/video gallery
|
||||
@@ -372,6 +411,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX',
|
||||
'info_dict': {
|
||||
'id': 'iX265HX',
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://imgur.com/a/8pih2Ed',
|
||||
'info_dict': {
|
||||
|
||||
@@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor):
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@@ -435,10 +434,10 @@ class InstagramIE(InstagramBaseIE):
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'doc_id': '8845758582119845',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
||||
|
||||
if not general_info:
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
@@ -453,7 +452,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
else:
|
||||
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
|
||||
webpage = self._download_webpage(
|
||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
|
||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or ''
|
||||
additional_data = self._search_json(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
|
||||
if not additional_data and not media:
|
||||
|
||||
@@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
|
||||
'id': 'p51388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
|
||||
'upload_date': '20201103',
|
||||
'timestamp': 1604437480,
|
||||
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
|
||||
'episode': 'Partička (92)',
|
||||
'season': 'Partička',
|
||||
'series': 'Prima Partička',
|
||||
'episode_number': 92,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
|
||||
'info_dict': {
|
||||
'id': 'p1412199',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 3,
|
||||
'episode': 'Tenerife: V říši ohně',
|
||||
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
|
||||
'duration': 3111.0,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
|
||||
'title': 'Tenerife: V říši ohně',
|
||||
'timestamp': 1711825800,
|
||||
'upload_date': '20240330',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
@@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
|
||||
video_id = self._search_regex((
|
||||
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
@@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
|
||||
final_result = self._search_json_ld(webpage, video_id, default={})
|
||||
final_result.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': final_result.get('title') or title,
|
||||
'thumbnail': self._html_search_meta(
|
||||
['thumbnail', 'og:image', 'twitter:image'],
|
||||
webpage, 'thumbnail', default=None),
|
||||
|
||||
@@ -2,7 +2,6 @@ import functools
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
|
||||
|
||||
|
||||
class SangiinInstructionIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
IE_DESC = False # this shouldn't be listed as a supported site
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
||||
raise ExtractorError(
|
||||
'Copy the link from the button below the video description/player '
|
||||
'and use that link to download. If there is no button in the frame, '
|
||||
'get the URL of the frame showing the video.', expected=True)
|
||||
|
||||
|
||||
class SangiinIE(InfoExtractor):
|
||||
|
||||
@@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE):
|
||||
'title': 'naagin',
|
||||
},
|
||||
'playlist_mincount': 120,
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
|
||||
'info_dict': {
|
||||
'id': '3499820',
|
||||
'title': 'mtv-splitsvilla-x5',
|
||||
},
|
||||
'playlist_mincount': 310,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id):
|
||||
seasons = self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
|
||||
'Downloading series metadata JSON', query={
|
||||
'sort': 'season:asc',
|
||||
'id': series_id,
|
||||
'responseType': 'common',
|
||||
})
|
||||
seasons = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
|
||||
'Downloading series metadata JSON', query={'responseType': 'common'}), (
|
||||
'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
|
||||
'trayTabs', lambda _, v: v['id']))
|
||||
|
||||
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
|
||||
for season_num, season in enumerate(seasons, start=1):
|
||||
season_id = season['id']
|
||||
label = season.get('season') or season_num
|
||||
label = season.get('label') or season_num
|
||||
for page_num in itertools.count(1):
|
||||
episodes = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
||||
|
||||
@@ -158,7 +158,7 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
|
||||
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||
'info_dict': {
|
||||
@@ -173,6 +173,13 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
'title': 'Mood Hindi',
|
||||
},
|
||||
'playlist_mincount': 801,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
|
||||
'info_dict': {
|
||||
'id': 'Me5RridRfDk_',
|
||||
'title': 'Taaza Tunes',
|
||||
},
|
||||
'playlist_mincount': 301,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
|
||||
@@ -3,43 +3,52 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
|
||||
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video['youtubeId'],
|
||||
'id': video.get('slug'),
|
||||
'title': video.get('title'),
|
||||
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'description': video.get('description'),
|
||||
'id': video['youtubeId'],
|
||||
'ie_key': 'Youtube',
|
||||
**traverse_obj(video, {
|
||||
'display_id': ('id', {str_or_none}),
|
||||
'title': ('translatedTitle', {str}),
|
||||
'thumbnail': ('thumbnailUrls', ..., 'url', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'description': ('description', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.khanacademy.org/api/internal/graphql/FetchContentData',
|
||||
display_id, query={
|
||||
'https://www.khanacademy.org/api/internal/graphql/ContentForPath', display_id,
|
||||
query={
|
||||
'fastly_cacheable': 'persist_until_publish',
|
||||
'hash': '4134764944',
|
||||
'lang': 'en',
|
||||
'pcv': self._PUBLISHED_CONTENT_VERSION,
|
||||
'hash': '3712657851',
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'queryParams': 'lang=en',
|
||||
'isModal': False,
|
||||
'followRedirects': True,
|
||||
'countryCode': 'US',
|
||||
'kaLocale': 'en',
|
||||
'clientPublishedContentVersion': self._PUBLISHED_CONTENT_VERSION,
|
||||
}),
|
||||
})['data']['contentJson']
|
||||
return self._parse_component_props(self._parse_json(content, display_id)['componentProps'])
|
||||
'lang': 'en',
|
||||
})['data']['contentRoute']['listedPathData']
|
||||
return self._parse_component_props(content, display_id)
|
||||
|
||||
|
||||
class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
@@ -47,64 +56,98 @@ class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
|
||||
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
|
||||
'md5': '1d5c2e70fa6aa29c38eca419f12515ce',
|
||||
'info_dict': {
|
||||
'id': 'FlIG3TvQCBQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'The one-time pad',
|
||||
'description': 'The perfect cipher',
|
||||
'display_id': '716378217',
|
||||
'duration': 176,
|
||||
'uploader': 'Brit Cruise',
|
||||
'uploader_id': 'khanacademy',
|
||||
'uploader': 'Khan Academy',
|
||||
'uploader_id': '@khanacademy',
|
||||
'uploader_url': 'https://www.youtube.com/@khanacademy',
|
||||
'upload_date': '20120411',
|
||||
'timestamp': 1334170113,
|
||||
'license': 'cc-by-nc-sa',
|
||||
'live_status': 'not_live',
|
||||
'channel': 'Khan Academy',
|
||||
'channel_id': 'UC4a-Gbdw7vOaccHmFo40b9g',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g',
|
||||
'channel_is_verified': True,
|
||||
'playable_in_embed': True,
|
||||
'categories': ['Education'],
|
||||
'creators': ['Brit Cruise'],
|
||||
'tags': [],
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'thumbnail': str,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'heatmap': list,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
video = component_props['tutorialPageData']['contentModel']
|
||||
info = self._parse_video(video)
|
||||
author_names = video.get('authorNames')
|
||||
info.update({
|
||||
'uploader': ', '.join(author_names) if author_names else None,
|
||||
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||
'license': video.get('kaUserLicense'),
|
||||
})
|
||||
return info
|
||||
def _parse_component_props(self, component_props, display_id):
|
||||
video = component_props['content']
|
||||
return {
|
||||
**self._parse_video(video),
|
||||
**traverse_obj(video, {
|
||||
'creators': ('authorNames', ..., {str}),
|
||||
'timestamp': ('dateAdded', {parse_iso8601}),
|
||||
'license': ('kaUserLicense', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KhanAcademyUnitIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy:unit'
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('1,2', '')) + '/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
|
||||
'info_dict': {
|
||||
'id': 'cryptography',
|
||||
'id': 'x48c910b6',
|
||||
'title': 'Cryptography',
|
||||
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
||||
'display_id': 'computing/computer-science/cryptography',
|
||||
'_old_archive_ids': ['khanacademyunit cryptography'],
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science',
|
||||
'info_dict': {
|
||||
'id': 'x301707a0',
|
||||
'title': 'Computer science theory',
|
||||
'description': 'md5:4b472a4646e6cf6ec4ccb52c4062f8ba',
|
||||
'display_id': 'computing/computer-science',
|
||||
'_old_archive_ids': ['khanacademyunit computer-science'],
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}]
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
curation = component_props['curation']
|
||||
def _parse_component_props(self, component_props, display_id):
|
||||
course = component_props['course']
|
||||
selected_unit = traverse_obj(course, (
|
||||
'unitChildren', lambda _, v: v['relativeUrl'] == f'/{display_id}', any)) or course
|
||||
|
||||
entries = []
|
||||
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
|
||||
for tutorial_number, tutorial in enumerate(tutorials, 1):
|
||||
chapter_info = {
|
||||
'chapter': tutorial.get('title'),
|
||||
'chapter_number': tutorial_number,
|
||||
'chapter_id': tutorial.get('id'),
|
||||
}
|
||||
for content_item in (tutorial.get('contentItems') or []):
|
||||
if content_item.get('kind') == 'Video':
|
||||
info = self._parse_video(content_item)
|
||||
info.update(chapter_info)
|
||||
entries.append(info)
|
||||
def build_entry(entry):
|
||||
return self.url_result(urljoin(
|
||||
'https://www.khanacademy.org', entry['canonicalUrl']),
|
||||
KhanAcademyIE, title=entry.get('translatedTitle'))
|
||||
|
||||
entries = traverse_obj(selected_unit, (
|
||||
(('unitChildren', ...), None), 'allOrderedChildren', ..., 'curatedChildren',
|
||||
lambda _, v: v['contentKind'] == 'Video' and v['canonicalUrl'], {build_entry}))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, curation.get('unit'), curation.get('title'),
|
||||
curation.get('description'))
|
||||
entries,
|
||||
display_id=display_id,
|
||||
**traverse_obj(selected_unit, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('translatedTitle', {str}),
|
||||
'description': ('translatedDescription', {str}),
|
||||
'_old_archive_ids': ('slug', {str}, {lambda x: [make_archive_id(self, x)] if x else None}),
|
||||
}))
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
@@ -25,104 +30,212 @@ class KickBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
||||
return self._download_json(
|
||||
f'https://kick.com/api/v1/{path}', display_id, note=note,
|
||||
f'https://kick.com/api/{path}', display_id, note=note,
|
||||
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
|
||||
|
||||
|
||||
class KickIE(KickBaseIE):
|
||||
IE_NAME = 'kick:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/yuppy',
|
||||
'url': 'https://kick.com/buddha',
|
||||
'info_dict': {
|
||||
'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
|
||||
'id': '92722911-nopixel-40',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'channel': 'yuppy',
|
||||
'channel_id': '33538',
|
||||
'uploader': 'Yuppy',
|
||||
'uploader_id': '33793',
|
||||
'upload_date': str,
|
||||
'live_status': 'is_live',
|
||||
'timestamp': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': list,
|
||||
'upload_date': str,
|
||||
'channel': 'buddha',
|
||||
'channel_id': '32807',
|
||||
'uploader': 'Buddha',
|
||||
'uploader_id': '33057',
|
||||
'live_status': 'is_live',
|
||||
'concurrent_view_count': int,
|
||||
'release_timestamp': int,
|
||||
'age_limit': 18,
|
||||
'release_date': str,
|
||||
},
|
||||
'skip': 'livestream',
|
||||
'params': {'skip_download': 'livestream'},
|
||||
# 'skip': 'livestream',
|
||||
}, {
|
||||
'url': 'https://kick.com/kmack710',
|
||||
'url': 'https://kick.com/xqc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
response = self._call_api(f'channels/{channel}', channel)
|
||||
response = self._call_api(f'v2/channels/{channel}', channel)
|
||||
if not traverse_obj(response, 'livestream', expected_type=dict):
|
||||
raise UserNotLive(video_id=channel)
|
||||
|
||||
return {
|
||||
'id': str(traverse_obj(
|
||||
response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
response['playback_url'], channel, 'mp4', live=True),
|
||||
'title': traverse_obj(
|
||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
||||
'description': traverse_obj(response, ('user', 'bio')),
|
||||
'channel': channel,
|
||||
'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
|
||||
'uploader': traverse_obj(response, 'name', ('user', 'username')),
|
||||
'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
|
||||
'is_live': True,
|
||||
'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
|
||||
'thumbnail': traverse_obj(
|
||||
response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
|
||||
'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
|
||||
'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True),
|
||||
**traverse_obj(response, {
|
||||
'id': ('livestream', 'slug', {str}),
|
||||
'title': ('livestream', 'session_title', {str}),
|
||||
'description': ('user', 'bio', {str}),
|
||||
'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any),
|
||||
'uploader': (('name', ('user', 'username')), {str}, any),
|
||||
'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any),
|
||||
'timestamp': ('livestream', 'created_at', {unified_timestamp}),
|
||||
'release_timestamp': ('livestream', 'start_time', {unified_timestamp}),
|
||||
'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}),
|
||||
'categories': ('recent_categories', ..., 'name', {str}),
|
||||
'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}),
|
||||
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KickVODIE(KickBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
IE_NAME = 'kick:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
|
||||
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
||||
'info_dict': {
|
||||
'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
|
||||
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'ext': 'mp4',
|
||||
'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
|
||||
'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
|
||||
'channel': 'jaredfps',
|
||||
'channel_id': '26608',
|
||||
'uploader': 'JaredFPS',
|
||||
'uploader_id': '26799',
|
||||
'upload_date': '20240402',
|
||||
'timestamp': 1712097108,
|
||||
'duration': 33859.0,
|
||||
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
|
||||
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
|
||||
'channel': 'xqc',
|
||||
'channel_id': '668',
|
||||
'uploader': 'xQc',
|
||||
'uploader_id': '676',
|
||||
'upload_date': '20240909',
|
||||
'timestamp': 1725919141,
|
||||
'duration': 10155.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'categories': ['Call of Duty: Warzone'],
|
||||
'view_count': int,
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': [r'impersonation'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
response = self._call_api(f'video/{video_id}', video_id)
|
||||
response = self._call_api(f'v1/video/{video_id}', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
|
||||
'title': traverse_obj(
|
||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
||||
'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
|
||||
'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
|
||||
'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
|
||||
'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
|
||||
'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
|
||||
'timestamp': unified_timestamp(response.get('created_at')),
|
||||
'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
|
||||
'thumbnail': traverse_obj(
|
||||
response, ('livestream', 'thumbnail'), expected_type=url_or_none),
|
||||
'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
|
||||
**traverse_obj(response, {
|
||||
'title': ('livestream', ('session_title', 'slug'), {str}, any),
|
||||
'description': ('livestream', 'channel', 'user', 'bio', {str}),
|
||||
'channel': ('livestream', 'channel', 'slug', {str}),
|
||||
'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}),
|
||||
'uploader': ('livestream', 'channel', 'user', 'username', {str}),
|
||||
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
|
||||
'categories': ('livestream', 'categories', ..., 'name', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class KickClipIE(KickBaseIE):
|
||||
IE_NAME = 'kick:clips'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'info_dict': {
|
||||
'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maddy detains Abd D:',
|
||||
'channel': 'mxddy',
|
||||
'channel_id': '133789',
|
||||
'uploader': 'AbdCreates',
|
||||
'uploader_id': '3309077',
|
||||
'thumbnail': r're:^https?://.*\.jpeg',
|
||||
'duration': 35,
|
||||
'timestamp': 1682481453,
|
||||
'upload_date': '20230426',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['VALORANT'],
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3',
|
||||
'info_dict': {
|
||||
'id': 'clip_01H9SKET879NE7N9RJRRDS98J3',
|
||||
'title': 'W jews',
|
||||
'ext': 'mp4',
|
||||
'channel': 'destiny',
|
||||
'channel_id': '1772249',
|
||||
'uploader': 'punished_furry',
|
||||
'uploader_id': '2027722',
|
||||
'duration': 49.0,
|
||||
'upload_date': '20230908',
|
||||
'timestamp': 1694150180,
|
||||
'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'info_dict': {
|
||||
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'ext': 'mp4',
|
||||
'title': 'KLJASLDJKLJKASDLJKDAS',
|
||||
'channel': 'spreen',
|
||||
'channel_id': '5312671',
|
||||
'uploader': 'AnormalBarraBaja',
|
||||
'uploader_id': '26518262',
|
||||
'duration': 43.0,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727399987,
|
||||
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Minecraft'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
clip_id = self._match_id(url)
|
||||
clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip']
|
||||
clip_url = clip['clip_url']
|
||||
|
||||
if determine_ext(clip_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4')
|
||||
else:
|
||||
formats = [{'url': clip_url}]
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(clip, {
|
||||
'title': ('title', {str}),
|
||||
'channel': ('channel', 'slug', {str}),
|
||||
'channel_id': ('channel', 'id', {int}, {str_or_none}),
|
||||
'uploader': ('creator', 'username', {str}),
|
||||
'uploader_id': ('creator', 'id', {int}, {str_or_none}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'categories': ('category', 'name', {str}, all),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||
}),
|
||||
}
|
||||
|
||||
126
yt_dlp/extractor/kika.py
Normal file
126
yt_dlp/extractor/kika.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class KikaIE(InfoExtractor):
|
||||
IE_DESC = 'KiKA.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
|
||||
'info_dict': {
|
||||
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240831',
|
||||
'timestamp': 1725126600,
|
||||
'season_number': 2024,
|
||||
'modified_date': '20240831',
|
||||
'episode': 'Episode 476',
|
||||
'episode_number': 476,
|
||||
'season': 'Season 2024',
|
||||
'duration': 634,
|
||||
'title': 'logo! vom Samstag, 31. August 2024',
|
||||
'modified_timestamp': 1725129983,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/kaltstart/videos/video92498',
|
||||
'md5': '710ece827e5055094afeb474beacb7aa',
|
||||
'info_dict': {
|
||||
'id': 'video92498',
|
||||
'ext': 'mp4',
|
||||
'title': '7. Wo ist Leo?',
|
||||
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
|
||||
'duration': 436,
|
||||
'timestamp': 1702926876,
|
||||
'upload_date': '20231218',
|
||||
'episode_number': 7,
|
||||
'modified_date': '20240319',
|
||||
'modified_timestamp': 1710880610,
|
||||
'episode': 'Episode 7',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
|
||||
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
|
||||
'info_dict': {
|
||||
'id': 'video90088',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20221102',
|
||||
'timestamp': 1667390580,
|
||||
'duration': 197,
|
||||
'modified_timestamp': 1711093771,
|
||||
'episode_number': 8,
|
||||
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
|
||||
'modified_date': '20240322',
|
||||
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 8',
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
|
||||
video_assets = self._download_json(doc['assets']['url'], video_id)
|
||||
|
||||
subtitles = {}
|
||||
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
|
||||
subtitles['de'] = [{
|
||||
'url': ttml_resource,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': webvtt_resource,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': list(self._extract_formats(video_assets, video_id)),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(doc, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'modified_timestamp': ('modificationDate', {parse_iso8601}),
|
||||
'duration': ((
|
||||
('durationInSeconds', {int_or_none}),
|
||||
('duration', {parse_duration})), any),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
|
||||
stream_url = media['url']
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'm3u8':
|
||||
yield from self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
yield {
|
||||
'url': stream_url,
|
||||
'format_id': ext,
|
||||
**traverse_obj(media, {
|
||||
'width': ('frameWidth', {int_or_none}),
|
||||
'height': ('frameHeight', {int_or_none}),
|
||||
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
|
||||
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
|
||||
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
}),
|
||||
}
|
||||
114
yt_dlp/extractor/laracasts.py
Normal file
114
yt_dlp/extractor/laracasts.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LaracastsBaseIE(InfoExtractor):
|
||||
def _get_prop_data(self, url, display_id):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return traverse_obj(
|
||||
get_element_html_by_id('app', webpage),
|
||||
({extract_attributes}, 'data-page', {json.loads}, 'props'))
|
||||
|
||||
def _parse_episode(self, episode):
|
||||
if not traverse_obj(episode, 'vimeoId'):
|
||||
self.raise_login_required('This video is only available for subscribers.')
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
|
||||
VimeoIE, url_transparent=True,
|
||||
**traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
|
||||
'title': ('title', {clean_html}),
|
||||
'season_number': ('chapter', {int_or_none}),
|
||||
'episode_number': ('position', {int_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('largeThumbnail', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'date': ('dateSegments', 'published', {unified_strdate}),
|
||||
}))
|
||||
|
||||
|
||||
class LaracastsIE(LaracastsBaseIE):
|
||||
IE_NAME = 'laracasts'
|
||||
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
|
||||
'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
|
||||
'info_dict': {
|
||||
'id': '922040563',
|
||||
'title': 'Hello, Laravel',
|
||||
'ext': 'mp4',
|
||||
'duration': 519,
|
||||
'date': '20240312',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
|
||||
'description': 'md5:ddd658bb241975871d236555657e1dd1',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'uploader': 'Laracasts',
|
||||
'uploader_id': 'user20182673',
|
||||
'uploader_url': 'https://vimeo.com/user20182673',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
|
||||
|
||||
|
||||
class LaracastsPlaylistIE(LaracastsBaseIE):
|
||||
IE_NAME = 'laracasts:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
|
||||
'info_dict': {
|
||||
'title': '30 Days to Learn Laravel',
|
||||
'id': '210',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
|
||||
'duration': 30600.0,
|
||||
'modified_date': '20240511',
|
||||
'description': 'md5:27c260a1668a450984e8f901579912dd',
|
||||
'categories': ['Frameworks'],
|
||||
'tags': ['Laravel'],
|
||||
'display_id': '30-days-to-learn-laravel-11',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
series = self._get_prop_data(url, display_id)['series']
|
||||
|
||||
metadata = {
|
||||
'display_id': display_id,
|
||||
**traverse_obj(series, {
|
||||
'title': ('title', {str}),
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
|
||||
'duration': ('runTime', {parse_duration}),
|
||||
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
|
||||
'tags': ('topics', ..., 'name', {str}),
|
||||
'modified_date': ('lastUpdated', {unified_strdate}),
|
||||
}),
|
||||
}
|
||||
|
||||
return self.playlist_result(traverse_obj(
|
||||
series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)
|
||||
@@ -136,6 +136,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
IE_DESC = 'odysee.com'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
|
||||
(?:\$/(?:download|embed)/)?
|
||||
(?P<id>
|
||||
@@ -364,6 +365,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
IE_DESC = 'odysee.com channels'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
@@ -391,6 +393,7 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
|
||||
class LBRYPlaylistIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:playlist'
|
||||
IE_DESC = 'odysee.com playlists'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
|
||||
78
yt_dlp/extractor/learningonscreen.py
Normal file
78
yt_dlp/extractor/learningonscreen.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LearningOnScreenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
|
||||
'info_dict': {
|
||||
'id': '005D81B2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planet Earth',
|
||||
'duration': 3600.0,
|
||||
'timestamp': 1164567600.0,
|
||||
'upload_date': '20061126',
|
||||
'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||
self.raise_login_required(
|
||||
'Use --cookies for authentication. See '
|
||||
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
|
||||
'for how to manually pass cookies', method=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
details = traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'programme-details')}, {
|
||||
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
|
||||
'timestamp': (
|
||||
{functools.partial(get_element_by_class, 'broadcast-date')},
|
||||
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
|
||||
'duration': (
|
||||
{functools.partial(get_element_by_class, 'prog-running-time')},
|
||||
{clean_html}, {parse_duration}),
|
||||
}))
|
||||
|
||||
title = details.pop('title', None) or traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
|
||||
{extract_attributes}, 'data-record-title', {clean_html}))
|
||||
|
||||
entries = self._parse_html5_media_entries(
|
||||
'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
|
||||
_headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
|
||||
if not entries:
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
if len(entries) > 1:
|
||||
duration = details.pop('duration', None)
|
||||
for idx, entry in enumerate(entries, start=1):
|
||||
entry.update(details)
|
||||
entry['id'] = join_nonempty(video_id, idx)
|
||||
entry['title'] = join_nonempty(title, idx)
|
||||
return self.playlist_result(entries, video_id, title, duration=duration)
|
||||
|
||||
return {
|
||||
**entries[0],
|
||||
**details,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
@@ -1,86 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||
'info_dict': {
|
||||
'id': '10809',
|
||||
'ext': 'mp4',
|
||||
'title': "Put'ka: Trys Klausimai",
|
||||
'upload_date': '20161216',
|
||||
'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
|
||||
'age_limit': 18,
|
||||
'duration': 117,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1481904000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
|
||||
'info_dict': {
|
||||
'id': '10467',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nėrdas: Kompiuterio Valymas',
|
||||
'upload_date': '20150113',
|
||||
'description': 'md5:7352d113a242a808676ff17e69db6a69',
|
||||
'age_limit': 18,
|
||||
'duration': 346,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421164800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_LIMITS = {
|
||||
'N-7': 7,
|
||||
'N-14': 14,
|
||||
'S': 18,
|
||||
}
|
||||
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
|
||||
display_id)['videoConfig']['videoInfo']
|
||||
|
||||
video_id = str(video_info['id'])
|
||||
title = video_info['title']
|
||||
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'description': clean_html(video_info.get('htmlDescription')),
|
||||
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
|
||||
'timestamp': parse_iso8601(video_info.get('airDate')),
|
||||
'view_count': int_or_none(video_info.get('viewsCount')),
|
||||
}
|
||||
|
||||
|
||||
class LnkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'
|
||||
|
||||
@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response
|
||||
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': '8488817242a0db1cb2ad0ea522553cf6',
|
||||
'md5': 'b321d261656848c184a94e3b93eae28d',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
|
||||
'timestamp': 1657216459,
|
||||
'duration': 181,
|
||||
},
|
||||
'params': {'format': 'bestvideo'}, # Test video-only fixup
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
|
||||
format_url = format_url.replace('-split.m3u8', '.m3u8')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
|
||||
# Sometimes only split video/audio formats are available, need to fixup video-only formats
|
||||
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
|
||||
for fmt in m3u8_formats:
|
||||
if is_not_premerged and fmt.get('vcodec') != 'none':
|
||||
fmt['acodec'] = 'none'
|
||||
yield {
|
||||
**fmt,
|
||||
'url': update_url(fmt['url'], query=query),
|
||||
|
||||
@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
|
||||
video_data = None
|
||||
|
||||
# fix meta_url if missing the host address
|
||||
if re.match(r'^\/\+\/', meta_url):
|
||||
if re.match(r'\/\+\/', meta_url):
|
||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||
|
||||
if meta_url:
|
||||
|
||||
@@ -1,51 +1,35 @@
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import xpath_text
|
||||
|
||||
|
||||
class MatchTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
|
||||
_VALID_URL = [
|
||||
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
|
||||
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://matchtv.ru/#live-player',
|
||||
'url': 'http://matchtv.ru/on-air/',
|
||||
'info_dict': {
|
||||
'id': 'matchtv-live',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://matchtv.ru/on-air/',
|
||||
'url': 'https://video.matchtv.ru/iframe/channel/106',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'matchtv-live'
|
||||
video_url = self._download_json(
|
||||
'http://player.matchtv.ntvplus.tv/player/smil', video_id,
|
||||
query={
|
||||
'ts': '',
|
||||
'quality': 'SD',
|
||||
'contentId': '561d2c0df7159b37178b4567',
|
||||
'sign': '',
|
||||
'includeHighlights': '0',
|
||||
'userId': '',
|
||||
'sessionId': random.randint(1, 1000000000),
|
||||
'contentType': 'channel',
|
||||
'timeShift': '0',
|
||||
'platform': 'portal',
|
||||
},
|
||||
headers={
|
||||
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
|
||||
})['data']['videoUrl']
|
||||
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'Матч ТВ - Прямой эфир',
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
IE_DESC = 'MDR.DE'
|
||||
_VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
@@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
||||
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
||||
'info_dict': {
|
||||
'id': '19636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baumhaus vom 30. Oktober 2015',
|
||||
'duration': 134,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
||||
'info_dict': {
|
||||
'id': '8182',
|
||||
'ext': 'mp4',
|
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||
'timestamp': 1482541200,
|
||||
'upload_date': '20161224',
|
||||
'duration': 4628,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
# audio with alternative playerURL pattern
|
||||
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
|
||||
@@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
|
||||
'duration': 3239,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
# empty bitrateVideo and bitrateAudio
|
||||
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
|
||||
'info_dict': {
|
||||
'id': '128372',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der kleine Wichtel kehrt zurück',
|
||||
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
|
||||
'duration': 4876,
|
||||
'timestamp': 1607823300,
|
||||
'upload_date': '20201213',
|
||||
'uploader': 'ZDF',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
|
||||
(?P<id>[^/#?_]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
|
||||
'info_dict': {
|
||||
'id': '668177',
|
||||
'title': 'Az ajtó',
|
||||
'display_id': 'az-ajto',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
|
||||
},
|
||||
}, {
|
||||
# (old) mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||
'info_dict': {
|
||||
@@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230903',
|
||||
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) m4sport
|
||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||
@@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230908',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# m4sport with *video/ url and no date
|
||||
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
|
||||
@@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) hirado
|
||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||
@@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230911',
|
||||
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}, {
|
||||
# (old) petofilive
|
||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||
@@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230909',
|
||||
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -133,7 +147,9 @@ class MediaKlikkIE(InfoExtractor):
|
||||
r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
|
||||
|
||||
player_data['video'] = player_data.pop('token')
|
||||
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
|
||||
player_page = self._download_webpage(
|
||||
'https://player.mediaklikk.hu/playernew/player.php', video_id,
|
||||
query=player_data, headers={'Referer': url})
|
||||
player_json = self._search_json(
|
||||
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
|
||||
playlist_url = traverse_obj(
|
||||
@@ -141,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
|
||||
if not playlist_url:
|
||||
raise ExtractorError('Unable to extract playlist url')
|
||||
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
|
||||
|
||||
@@ -212,13 +213,14 @@ class MediasiteIE(InfoExtractor):
|
||||
stream_type, 'type%u' % stream_type)
|
||||
|
||||
stream_formats = []
|
||||
for unum, video_url in enumerate(video_urls):
|
||||
video_url = url_or_none(video_url.get('Location'))
|
||||
for unum, video in enumerate(video_urls):
|
||||
video_url = url_or_none(video.get('Location'))
|
||||
if not video_url:
|
||||
continue
|
||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||
|
||||
media_type = video_url.get('MediaType')
|
||||
media_type = video.get('MediaType')
|
||||
ext = mimetype2ext(video.get('MimeType'))
|
||||
if media_type == 'SS':
|
||||
stream_formats.extend(self._extract_ism_formats(
|
||||
video_url, resource_id,
|
||||
@@ -229,15 +231,20 @@ class MediasiteIE(InfoExtractor):
|
||||
video_url, resource_id,
|
||||
mpd_id=f'{stream_id}-{snum}.{unum}',
|
||||
fatal=False))
|
||||
elif ext in ('m3u', 'm3u8'):
|
||||
stream_formats.extend(self._extract_m3u8_formats(
|
||||
video_url, resource_id,
|
||||
m3u8_id=f'{stream_id}-{snum}.{unum}',
|
||||
fatal=False))
|
||||
else:
|
||||
stream_formats.append({
|
||||
'format_id': f'{stream_id}-{snum}.{unum}',
|
||||
'url': video_url,
|
||||
'ext': mimetype2ext(video_url.get('MimeType')),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
if stream.get('HasSlideContent', False):
|
||||
images = player_options['PlayerLayoutOptions']['Images']
|
||||
images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}))
|
||||
if stream.get('HasSlideContent') and images:
|
||||
stream_formats.append(self.__extract_slides(
|
||||
stream_id=stream_id,
|
||||
snum=snum,
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
IE_NAME = 'MangoTV'
|
||||
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj, unified_timestamp
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MicrosoftEmbedIE(InfoExtractor):
|
||||
@@ -63,3 +72,250 @@ class MicrosoftEmbedIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftMediusBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _sub_to_dict(subtitle_list):
|
||||
subtitles = {}
|
||||
for sub in subtitle_list:
|
||||
subtitles.setdefault(sub.pop('tag', 'und'), []).append(sub)
|
||||
return subtitles
|
||||
|
||||
def _extract_ism(self, ism_url, video_id):
|
||||
formats = self._extract_ism_formats(ism_url, video_id)
|
||||
for fmt in formats:
|
||||
if fmt['language'] != 'eng' and 'English' not in fmt['format_id']:
|
||||
fmt['language_preference'] = -10
|
||||
return formats
|
||||
|
||||
|
||||
class MicrosoftMediusIE(MicrosoftMediusBaseIE):
|
||||
_VALID_URL = r'https?://medius\.microsoft\.com/Embed/(?:Video\?id=|video-nc/|VideoDetails/)(?P<id>[\da-f-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://medius.microsoft.com/Embed/video-nc/9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||
'info_dict': {
|
||||
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||
'ext': 'ismv',
|
||||
'title': 'Rapidly code, test and ship from secure cloud developer environments',
|
||||
'description': 'md5:33c8e4facadc438613476eea24165f71',
|
||||
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||
'subtitles': 'count:30',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medius.microsoft.com/Embed/video-nc/81215af5-c813-4dcd-aede-94f4e1a7daa3',
|
||||
'info_dict': {
|
||||
'id': '81215af5-c813-4dcd-aede-94f4e1a7daa3',
|
||||
'ext': 'ismv',
|
||||
'title': 'Microsoft Build opening',
|
||||
'description': 'md5:43455096141077a1f23144cab8cec1cb',
|
||||
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||
'subtitles': 'count:31',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medius.microsoft.com/Embed/VideoDetails/78493569-9b3b-4a85-a409-ee76e789e25c',
|
||||
'info_dict': {
|
||||
'id': '78493569-9b3b-4a85-a409-ee76e789e25c',
|
||||
'ext': 'ismv',
|
||||
'title': ' Anomaly Detection & Root cause at Edge',
|
||||
'description': 'md5:f8f1ad93d7918649bfb97fa081b03b83',
|
||||
'thumbnail': r're:https://mediusdownload.event.microsoft.com/asset.*\.jpg.*',
|
||||
'subtitles': 'count:17',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medius.microsoft.com/Embed/Video?id=0dc69bda-079b-4070-a7db-a8da1a06a9c7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://medius.microsoft.com/Embed/video-nc/fe823a91-959c-465b-96d4-8f4db624f72c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_subtitle(self, webpage, video_id):
|
||||
captions = traverse_obj(
|
||||
self._search_json(r'const\s+captionsConfiguration\s*=', webpage, 'captions', video_id, default=None),
|
||||
('languageList', lambda _, v: url_or_none(v['src']), {
|
||||
'url': 'src',
|
||||
'tag': ('srclang', {str}),
|
||||
'name': ('kind', {str}),
|
||||
})) or [{'url': url, 'tag': url_basename(url).split('.vtt')[0].split('_')[-1]}
|
||||
for url in re.findall(r'var\s+file\s+=\s+\{[^}]+\'(https://[^\']+\.vtt\?[^\']+)', webpage)]
|
||||
|
||||
return self._sub_to_dict(captions)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(f'https://medius.microsoft.com/Embed/video-nc/{video_id}', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': self._extract_ism(
|
||||
self._search_regex(r'StreamUrl\s*=\s*"([^"]+manifest)"', webpage, 'ism url'), video_id),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'subtitles': self._extract_subtitle(webpage, video_id),
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftLearnPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?(?P<type>shows|events)/(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners',
|
||||
'info_dict': {
|
||||
'id': 'bash-for-beginners',
|
||||
'title': 'Bash for Beginners',
|
||||
'description': 'md5:16a91c07222117d1e00912f0dbc02c2c',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}, {
|
||||
'url': 'https://learn.microsoft.com/en-us/events/build-2022',
|
||||
'info_dict': {
|
||||
'id': 'build-2022',
|
||||
'title': 'Microsoft Build 2022 - Events',
|
||||
'description': 'md5:c16b43848027df837b22c6fbac7648d3',
|
||||
},
|
||||
'playlist_count': 201,
|
||||
}]
|
||||
|
||||
def _entries(self, url_base, video_id):
|
||||
skip = 0
|
||||
while True:
|
||||
playlist_info = self._download_json(url_base, video_id, f'Downloading entries {skip}', query={
|
||||
'locale': 'en-us',
|
||||
'$skip': skip,
|
||||
})
|
||||
url_paths = traverse_obj(playlist_info, ('results', ..., 'url', {str}))
|
||||
for url_path in url_paths:
|
||||
yield self.url_result(f'https://learn.microsoft.com/en-us{url_path}')
|
||||
skip += len(url_paths)
|
||||
if skip >= playlist_info.get('count', 0) or not url_paths:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
metainfo = {
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
sub_type = 'episodes' if playlist_type == 'shows' else 'sessions'
|
||||
|
||||
url_base = f'https://learn.microsoft.com/api/contentbrowser/search/{playlist_type}/{playlist_id}/{sub_type}'
|
||||
return self.playlist_result(self._entries(url_base, playlist_id), playlist_id, **metainfo)
|
||||
|
||||
|
||||
class MicrosoftLearnEpisodeIE(MicrosoftMediusBaseIE):
|
||||
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?shows/[\w-]+/(?P<id>[^?#/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners/what-is-the-difference-between-a-terminal-and-a-shell-2-of-20-bash-for-beginners/',
|
||||
'info_dict': {
|
||||
'id': 'd44e1a03-a0e5-45c2-9496-5c9fa08dc94c',
|
||||
'ext': 'ismv',
|
||||
'title': 'What is the Difference Between a Terminal and a Shell? (Part 2 of 20)',
|
||||
'description': 'md5:7bbbfb593d21c2cf2babc3715ade6b88',
|
||||
'timestamp': 1676339547,
|
||||
'upload_date': '20230214',
|
||||
'thumbnail': r're:https://learn\.microsoft\.com/video/media/.*\.png',
|
||||
'subtitles': 'count:14',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
entry_id = self._html_search_meta('entryId', webpage, 'entryId', fatal=True)
|
||||
video_info = self._download_json(
|
||||
f'https://learn.microsoft.com/api/video/public/v1/entries/{entry_id}', video_id)
|
||||
return {
|
||||
'id': entry_id,
|
||||
'formats': self._extract_ism(video_info['publicVideo']['adaptiveVideoUrl'], video_id),
|
||||
'subtitles': self._sub_to_dict(traverse_obj(video_info, (
|
||||
'publicVideo', 'captions', lambda _, v: url_or_none(v['url']), {
|
||||
'tag': ('language', {str}),
|
||||
'url': 'url',
|
||||
}))),
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
**traverse_obj(video_info, {
|
||||
'timestamp': ('createTime', {parse_iso8601}),
|
||||
'thumbnails': ('publicVideo', 'thumbnailOtherSizes', ..., {'url': {url_or_none}}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftLearnSessionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?events/[\w-]+/(?P<id>[^?#/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learn.microsoft.com/en-us/events/build-2022/ts01-rapidly-code-test-ship-from-secure-cloud-developer-environments',
|
||||
'info_dict': {
|
||||
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||
'ext': 'ismv',
|
||||
'title': 'Rapidly code, test and ship from secure cloud developer environments - Events',
|
||||
'description': 'md5:f26c1a85d41c1cffd27a0279254a25c3',
|
||||
'timestamp': 1653408600,
|
||||
'upload_date': '20220524',
|
||||
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
metainfo = {
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': parse_iso8601(self._html_search_meta('startDate', webpage, 'startDate')),
|
||||
}
|
||||
|
||||
return self.url_result(
|
||||
self._html_search_meta('externalVideoUrl', webpage, 'videoUrl', fatal=True),
|
||||
url_transparent=True, ie=MicrosoftMediusIE, **metainfo)
|
||||
|
||||
|
||||
class MicrosoftBuildIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://build\.microsoft\.com/[\w-]+/sessions/(?P<id>[\da-f-]+)',
|
||||
r'https?://build\.microsoft\.com/[\w-]+/(?P<id>sessions)/?(?:[?#]|$)',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://build.microsoft.com/en-US/sessions/b49feb31-afcd-4217-a538-d3ca1d171198?source=sessions',
|
||||
'info_dict': {
|
||||
'id': 'aee55fb5-fcf9-4b38-b764-a3527cb57554',
|
||||
'ext': 'ismv',
|
||||
'title': 'Microsoft Build opening keynote',
|
||||
'description': 'md5:d38338f336ef4b6ef9ad2a7466a76655',
|
||||
'timestamp': 1716307200,
|
||||
'upload_date': '20240521',
|
||||
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://build.microsoft.com/en-US/sessions',
|
||||
'info_dict': {
|
||||
'id': 'sessions',
|
||||
},
|
||||
'playlist_mincount': 418,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
video_info['onDemand'], ie=MicrosoftMediusIE, url_transparent=True, **traverse_obj(video_info, {
|
||||
'id': ('sessionId', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('startDateTime', {parse_iso8601}),
|
||||
}))
|
||||
for video_info in self._download_json(
|
||||
'https://api-v2.build.microsoft.com/api/session/all/en-US', video_id, 'Downloading video info')
|
||||
]
|
||||
if video_id == 'sessions':
|
||||
return self.playlist_result(entries, video_id)
|
||||
else:
|
||||
return traverse_obj(entries, (lambda _, v: v['id'] == video_id), get_all=False)
|
||||
|
||||
@@ -1,188 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
|
||||
def _extract_base_url(self, course_id, display_id):
|
||||
return self._download_json(
|
||||
f'https://api-mlxprod.microsoft.com/services/products/anonymous/{course_id}',
|
||||
display_id, 'Downloading course base URL')
|
||||
|
||||
def _extract_chapter_and_title(self, title):
|
||||
if not title:
|
||||
return None, None
|
||||
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
|
||||
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
|
||||
IE_NAME = 'mva'
|
||||
IE_DESC = 'Microsoft Virtual Academy videos'
|
||||
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
|
||||
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
|
||||
'info_dict': {
|
||||
'id': 'gfVXISmEB_6804984382',
|
||||
'ext': 'mp4',
|
||||
'title': 'Course Introduction',
|
||||
'formats': 'mincount:3',
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
}],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'mva:11788:gfVXISmEB_6804984382',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
course_id = mobj.group('course_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
|
||||
|
||||
settings = self._download_xml(
|
||||
f'{base_url}/content/content_{video_id}/videosettings.xml?v=1',
|
||||
video_id, 'Downloading video settings XML')
|
||||
|
||||
_, title = self._extract_chapter_and_title(xpath_text(
|
||||
settings, './/Title', 'title', fatal=True))
|
||||
|
||||
formats = []
|
||||
|
||||
for sources in settings.findall('.//MediaSources'):
|
||||
sources_type = sources.get('videoType')
|
||||
for source in sources.findall('./MediaSource'):
|
||||
video_url = source.text
|
||||
if not video_url or not video_url.startswith('http'):
|
||||
continue
|
||||
if sources_type == 'smoothstreaming':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
video_url, video_id, 'mss', fatal=False))
|
||||
continue
|
||||
video_mode = source.get('videoMode')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
|
||||
codec = source.get('codec')
|
||||
acodec, vcodec = [None] * 2
|
||||
if codec:
|
||||
codecs = codec.split(',')
|
||||
if len(codecs) == 2:
|
||||
acodec, vcodec = codecs
|
||||
elif len(codecs) == 1:
|
||||
vcodec = codecs[0]
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': video_mode,
|
||||
'height': height,
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for source in settings.findall('.//MarkerResourceSource'):
|
||||
subtitle_url = source.text
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': f'{base_url}/{subtitle_url}',
|
||||
'ext': source.get('type'),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
|
||||
IE_NAME = 'mva:course'
|
||||
IE_DESC = 'Microsoft Virtual Academy courses'
|
||||
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||
'info_dict': {
|
||||
'id': '11788',
|
||||
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
|
||||
},
|
||||
'playlist_count': 36,
|
||||
}, {
|
||||
# with emphasized chapters
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
|
||||
'info_dict': {
|
||||
'id': '16335',
|
||||
'title': 'Developing Windows 10 Games with Construct 2',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'mva:course:11788',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MicrosoftVirtualAcademyIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
course_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
base_url = self._extract_base_url(course_id, display_id)
|
||||
|
||||
manifest = self._download_json(
|
||||
f'{base_url}/imsmanifestlite.json',
|
||||
display_id, 'Downloading course manifest JSON')['manifest']
|
||||
|
||||
organization = manifest['organizations']['organization'][0]
|
||||
|
||||
entries = []
|
||||
for chapter in organization['item']:
|
||||
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
|
||||
chapter_id = chapter.get('@identifier')
|
||||
for item in chapter.get('item', []):
|
||||
item_id = item.get('@identifier')
|
||||
if not item_id:
|
||||
continue
|
||||
metadata = item.get('resource', {}).get('metadata') or {}
|
||||
if metadata.get('learningresourcetype') != 'Video':
|
||||
continue
|
||||
_, title = self._extract_chapter_and_title(item.get('title'))
|
||||
duration = parse_duration(metadata.get('duration'))
|
||||
description = metadata.get('description')
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
f'mva:{course_id}:{item_id}', {'base_url': base_url}),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'chapter': chapter_title,
|
||||
'chapter_number': chapter_number,
|
||||
'chapter_id': chapter_id,
|
||||
})
|
||||
|
||||
title = organization.get('title') or manifest.get('metadata', {}).get('title')
|
||||
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
@@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_BASE_URL = 'http://ocw.mit.edu/'
|
||||
|
||||
_TESTS = [
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
from .telecinco import TelecincoIE
|
||||
from .telecinco import TelecincoBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
class MiTeleIE(TelecincoBaseIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
||||
'info_dict': {
|
||||
@@ -27,6 +26,7 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1471209401,
|
||||
'upload_date': '20160814',
|
||||
},
|
||||
'skip': 'HTTP Error 404 Not Found',
|
||||
}, {
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
@@ -49,6 +49,26 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/',
|
||||
'info_dict': {
|
||||
'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Horizonte Temporada 5 Programa 171',
|
||||
'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3',
|
||||
'episode': 'Las Zonas de Bajas Emisiones, a debate',
|
||||
'episode_number': 171,
|
||||
'season': 'Season 5',
|
||||
'season_number': 5,
|
||||
'series': 'Horizonte',
|
||||
'duration': 7012,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727416450,
|
||||
'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg',
|
||||
'age_limit': 12,
|
||||
},
|
||||
'params': {'geo_bypass_country': 'ES'},
|
||||
}, {
|
||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -1,17 +1,23 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MLBBaseIE(InfoExtractor):
|
||||
@@ -275,76 +281,225 @@ class MLBVideoIE(MLBBaseIE):
|
||||
class MLBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
||||
_NETRC_MACHINE = 'mlb'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
||||
'info_dict': {
|
||||
'id': '661581',
|
||||
'ext': 'mp4',
|
||||
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
||||
'release_date': '20220702',
|
||||
'release_timestamp': 1656792300,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# makeup game: has multiple dates, need to avoid games with 'rescheduleDate'
|
||||
'url': 'https://www.mlb.com/tv/g747039/vd22541c4-5a29-45f7-822b-635ec041cf5e',
|
||||
'info_dict': {
|
||||
'id': '747039',
|
||||
'ext': 'mp4',
|
||||
'title': '2024-07-29 - Toronto Blue Jays @ Baltimore Orioles',
|
||||
'release_date': '20240729',
|
||||
'release_timestamp': 1722280200,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
_GRAPHQL_INIT_QUERY = '''\
|
||||
mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
|
||||
initSession(device: $device, clientType: $clientType, experience: $experience) {
|
||||
deviceId
|
||||
sessionId
|
||||
entitlements {
|
||||
code
|
||||
}
|
||||
location {
|
||||
countryCode
|
||||
regionName
|
||||
zipCode
|
||||
latitude
|
||||
longitude
|
||||
}
|
||||
clientExperience
|
||||
features
|
||||
}
|
||||
}'''
|
||||
_GRAPHQL_PLAYBACK_QUERY = '''\
|
||||
mutation initPlaybackSession(
|
||||
$adCapabilities: [AdExperienceType]
|
||||
$mediaId: String!
|
||||
$deviceId: String!
|
||||
$sessionId: String!
|
||||
$quality: PlaybackQuality
|
||||
) {
|
||||
initPlaybackSession(
|
||||
adCapabilities: $adCapabilities
|
||||
mediaId: $mediaId
|
||||
deviceId: $deviceId
|
||||
sessionId: $sessionId
|
||||
quality: $quality
|
||||
) {
|
||||
playbackSessionId
|
||||
playback {
|
||||
url
|
||||
token
|
||||
expiration
|
||||
cdn
|
||||
}
|
||||
}
|
||||
}'''
|
||||
_APP_VERSION = '7.8.2'
|
||||
_device_id = None
|
||||
_session_id = None
|
||||
_access_token = None
|
||||
_token_expiry = 0
|
||||
|
||||
@property
|
||||
def _api_headers(self):
|
||||
if (self._token_expiry - 120) <= time.time():
|
||||
self.write_debug('Access token has expired; re-logging in')
|
||||
self._perform_login(*self._get_login_info())
|
||||
return {'Authorization': f'Bearer {self._access_token}'}
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._access_token:
|
||||
self.raise_login_required(
|
||||
'All videos are only available to registered users', method='password')
|
||||
|
||||
def _set_device_id(self, username):
|
||||
if not self._device_id:
|
||||
self._device_id = self.cache.load(
|
||||
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
||||
if self._device_id:
|
||||
return
|
||||
self._device_id = str(uuid.uuid4())
|
||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
|
||||
access_token = self._download_json(
|
||||
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
||||
headers={
|
||||
'User-Agent': 'okhttp/3.12.1',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=data.encode())['access_token']
|
||||
try:
|
||||
self._access_token = self._download_json(
|
||||
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
||||
'Logging in', 'Unable to log in', headers={
|
||||
'User-Agent': 'okhttp/3.12.1',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'scope': 'openid offline_access',
|
||||
'client_id': '0oa3e1nutA1HLzAKG356',
|
||||
}))['access_token']
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 400:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
raise
|
||||
|
||||
entitlement = self._download_webpage(
|
||||
f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None,
|
||||
headers={
|
||||
'User-Agent': 'okhttp/3.12.1',
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
})
|
||||
self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0
|
||||
self._set_device_id(username)
|
||||
|
||||
data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
|
||||
self._access_token = self._download_json(
|
||||
'https://us.edge.bamgrid.com/token', None,
|
||||
self._session_id = self._call_api({
|
||||
'operationName': 'initSession',
|
||||
'query': self._GRAPHQL_INIT_QUERY,
|
||||
'variables': {
|
||||
'device': {
|
||||
'appVersion': self._APP_VERSION,
|
||||
'deviceFamily': 'desktop',
|
||||
'knownDeviceId': self._device_id,
|
||||
'languagePreference': 'ENGLISH',
|
||||
'manufacturer': '',
|
||||
'model': '',
|
||||
'os': '',
|
||||
'osVersion': '',
|
||||
},
|
||||
'clientType': 'WEB',
|
||||
},
|
||||
}, None, 'session ID')['data']['initSession']['sessionId']
|
||||
|
||||
def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True):
|
||||
return self._download_json(
|
||||
'https://media-gateway.mlb.com/graphql', video_id,
|
||||
f'Downloading {description}', f'Unable to download {description}', fatal=fatal,
|
||||
headers={
|
||||
**self._api_headers,
|
||||
'Accept': 'application/json',
|
||||
'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=data.encode())['access_token']
|
||||
'Content-Type': 'application/json',
|
||||
'x-client-name': 'WEB',
|
||||
'x-client-version': self._APP_VERSION,
|
||||
}, data=json.dumps(data, separators=(',', ':')).encode())
|
||||
|
||||
def _extract_formats_and_subtitles(self, broadcast, video_id):
|
||||
feed = traverse_obj(broadcast, ('homeAway', {str.title}))
|
||||
medium = traverse_obj(broadcast, ('type', {str}))
|
||||
language = traverse_obj(broadcast, ('language', {str.lower}))
|
||||
format_id = join_nonempty(feed, medium, language)
|
||||
|
||||
response = self._call_api({
|
||||
'operationName': 'initPlaybackSession',
|
||||
'query': self._GRAPHQL_PLAYBACK_QUERY,
|
||||
'variables': {
|
||||
'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'],
|
||||
'deviceId': self._device_id,
|
||||
'mediaId': broadcast['mediaId'],
|
||||
'quality': 'PLACEHOLDER',
|
||||
'sessionId': self._session_id,
|
||||
},
|
||||
}, video_id, f'{format_id} broadcast JSON', fatal=False)
|
||||
|
||||
playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict}))
|
||||
m3u8_url = traverse_obj(playback, ('url', {url_or_none}))
|
||||
token = traverse_obj(playback, ('token', {str}))
|
||||
|
||||
if not (m3u8_url and token):
|
||||
errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str})))
|
||||
if 'not entitled' in errors:
|
||||
raise ExtractorError(errors, expected=True)
|
||||
elif errors: # Only warn when 'blacked out' since radio formats are available
|
||||
self.report_warning(f'API returned errors for {format_id}: {errors}')
|
||||
else:
|
||||
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
|
||||
return [], {}
|
||||
|
||||
cdn_headers = {'x-cdn-token': token}
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
|
||||
m3u8_id=format_id, fatal=False, headers=cdn_headers)
|
||||
for fmt in fmts:
|
||||
fmt['http_headers'] = cdn_headers
|
||||
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
|
||||
fmt.setdefault('language', language)
|
||||
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
|
||||
fmt['source_preference'] = 10
|
||||
|
||||
return fmts, subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
airings = self._download_json(
|
||||
f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
|
||||
video_id)['data']['Airings']
|
||||
data = self._download_json(
|
||||
'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
|
||||
'gamePk': video_id,
|
||||
'hydrate': 'broadcasts(all),statusFlags',
|
||||
})
|
||||
metadata = traverse_obj(data, (
|
||||
'dates', ..., 'games',
|
||||
lambda _, v: str(v['gamePk']) == video_id and not v.get('rescheduleDate'), any))
|
||||
|
||||
broadcasts = traverse_obj(metadata, (
|
||||
'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for airing in airings:
|
||||
m3u8_url = self._download_json(
|
||||
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
|
||||
headers={
|
||||
'Authorization': self._access_token,
|
||||
'Accept': 'application/vnd.media-service+json; version=2',
|
||||
})['stream']['complete']
|
||||
f, s = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage')))
|
||||
formats.extend(f)
|
||||
self._merge_subtitles(s, target=subtitles)
|
||||
for broadcast in broadcasts:
|
||||
fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
|
||||
'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
|
||||
'title': join_nonempty(
|
||||
traverse_obj(metadata, ('officialDate', {str})),
|
||||
traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})),
|
||||
delim=' - '),
|
||||
'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON',
|
||||
'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
|
||||
}
|
||||
|
||||
|
||||
|
||||
121
yt_dlp/extractor/mojevideo.py
Normal file
121
yt_dlp/extractor/mojevideo.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, remove_end, update_url_query
|
||||
|
||||
|
||||
class MojevideoIE(InfoExtractor):
|
||||
IE_DESC = 'mojevideo.sk'
|
||||
_VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P<id>\w+)/(?P<display_id>[\w()]+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html',
|
||||
'md5': '384a4628bd2bbd261c5206cf77c38c17',
|
||||
'info_dict': {
|
||||
'id': '3d17c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chlapci dobetónovali sme, máme hotovo!',
|
||||
'display_id': 'chlapci_dobetonovali_sme_mame_hotovo',
|
||||
'description': 'md5:a0822126044050d304a9ef58c92ddb34',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg',
|
||||
'duration': 21.0,
|
||||
'upload_date': '20230919',
|
||||
'timestamp': 1695129706,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html',
|
||||
'md5': '517c3e111c53a67d10b429c1f344ba2f',
|
||||
'info_dict': {
|
||||
'id': '14677',
|
||||
'ext': 'mp4',
|
||||
'title': 'Deň blbec?',
|
||||
'display_id': 'den_blbec',
|
||||
'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg',
|
||||
'duration': 100.0,
|
||||
'upload_date': '20120515',
|
||||
'timestamp': 1337076481,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html',
|
||||
'md5': '64599a23d3ac31cf2fe069e4353d8162',
|
||||
'info_dict': {
|
||||
'id': '2feb2',
|
||||
'ext': 'mp4',
|
||||
'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)',
|
||||
'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)',
|
||||
'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg',
|
||||
'duration': 240.0,
|
||||
'upload_date': '20190708',
|
||||
'timestamp': 1562576592,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id_dec = self._search_regex(
|
||||
r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16))
|
||||
video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry')
|
||||
video_hashes = self._search_json(
|
||||
r'\bvHash\s*=', webpage, 'video hashes', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
|
||||
('', 1, 'normálna kvalita'),
|
||||
('_lq', 0, 'nízka kvalita'),
|
||||
('_hd', 2, 'HD-720p'),
|
||||
('_fhd', 3, 'FULL HD-1080p'),
|
||||
('_2k', 4, '2K-1440p'),
|
||||
]):
|
||||
formats.append({
|
||||
'format_id': f'mp4-{quality}',
|
||||
'quality': quality,
|
||||
'format_note': format_note,
|
||||
'url': update_url_query(
|
||||
f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', {
|
||||
'md5': video_hash,
|
||||
'expires': video_exp,
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'title': (self._og_search_title(webpage, default=None)
|
||||
or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')),
|
||||
'description': self._og_search_description(webpage),
|
||||
**self._search_json_ld(webpage, video_id, default={}),
|
||||
}
|
||||
@@ -5,39 +5,103 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
parse_count,
|
||||
remove_end,
|
||||
update_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class MurrtubeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
murrtube:|
|
||||
https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+)\-
|
||||
https?://murrtube\.net/(?:v/|videos/(?P<slug>[a-z0-9-]+?)-)
|
||||
)
|
||||
(?P<id>[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
|
||||
(?P<id>[A-Z0-9]{4}|[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})
|
||||
'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
||||
'md5': '169f494812d9a90914b42978e73aa690',
|
||||
'md5': '70380878a77e8565d4aea7f68b8bbb35',
|
||||
'info_dict': {
|
||||
'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
||||
'id': 'ca885d8456b95de529b6723b158032e11115d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inferno X Skyler',
|
||||
'description': 'Humping a very good slutty sheppy (roomate)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 284,
|
||||
'uploader': 'Inferno Wolf',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://storage.murrtube.net/murrtube-production/ekbs3zcfvuynnqfx72nn2tkokvsd',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'],
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://murrtube.net/v/0J2Q',
|
||||
'md5': '31262f6ac56f0ca75e5a54a0f3fefcb6',
|
||||
'info_dict': {
|
||||
'id': '8442998c52134968d9caa36e473e1a6bac6ca',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Hayel',
|
||||
'title': 'Who\'s in charge now?',
|
||||
'description': 'md5:795791e97e5b0f1805ea84573f02a997',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://storage.murrtube.net/murrtube-production/fb1ojjwiucufp34ya6hxu5vfqi5s',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_count(self, name, html):
|
||||
return parse_count(self._search_regex(
|
||||
rf'([\d,]+)\s+<span[^>]*>{name}</span>', html, name, default=None))
|
||||
|
||||
def _real_initialize(self):
|
||||
homepage = self._download_webpage(
|
||||
'https://murrtube.net', None, note='Getting session token')
|
||||
self._request_webpage(
|
||||
'https://murrtube.net/accept_age_check', None, 'Setting age cookie',
|
||||
data=urlencode_postdata(self._hidden_inputs(homepage)))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if video_id.startswith('murrtube:'):
|
||||
raise ExtractorError('Support for murrtube: prefix URLs is broken')
|
||||
video_page = self._download_webpage(url, video_id)
|
||||
video_attrs = extract_attributes(get_element_html_by_id('video', video_page))
|
||||
playlist = update_url(video_attrs['data-url'], query=None)
|
||||
video_id = self._search_regex(r'/([\da-f]+)/index.m3u8', playlist, 'video id')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': remove_end(self._og_search_title(video_page), ' - Murrtube'),
|
||||
'age_limit': 18,
|
||||
'formats': self._extract_m3u8_formats(playlist, video_id, 'mp4'),
|
||||
'description': self._og_search_description(video_page),
|
||||
'thumbnail': update_url(self._og_search_thumbnail(video_page, default=''), query=None) or None,
|
||||
'uploader': clean_html(get_element_by_class('pl-1 is-size-6 has-text-lighter', video_page)),
|
||||
'view_count': self._extract_count('Views', video_page),
|
||||
'like_count': self._extract_count('Likes', video_page),
|
||||
'comment_count': self._extract_count('Comments', video_page),
|
||||
}
|
||||
|
||||
|
||||
class MurrtubeUserIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_DESC = 'Murrtube user profile'
|
||||
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
|
||||
_TESTS = [{
|
||||
'url': 'https://murrtube.net/stormy',
|
||||
'info_dict': {
|
||||
'id': 'stormy',
|
||||
},
|
||||
'playlist_mincount': 27,
|
||||
}]
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _download_gql(self, video_id, op, note=None, fatal=True):
|
||||
result = self._download_json(
|
||||
@@ -46,73 +110,6 @@ class MurrtubeIE(InfoExtractor):
|
||||
headers={'Content-Type': 'application/json'})
|
||||
return result['data']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_gql(video_id, {
|
||||
'operationName': 'Medium',
|
||||
'variables': {
|
||||
'id': video_id,
|
||||
},
|
||||
'query': '''\
|
||||
query Medium($id: ID!) {
|
||||
medium(id: $id) {
|
||||
title
|
||||
description
|
||||
key
|
||||
duration
|
||||
commentsCount
|
||||
likesCount
|
||||
viewsCount
|
||||
thumbnailKey
|
||||
tagList
|
||||
user {
|
||||
name
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}'''})
|
||||
meta = data['medium']
|
||||
|
||||
storage_url = 'https://storage.murrtube.net/murrtube/'
|
||||
format_url = storage_url + meta.get('key', '')
|
||||
thumbnail = storage_url + meta.get('thumbnailKey', '')
|
||||
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
|
||||
else:
|
||||
formats = [{'url': format_url}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': meta.get('title'),
|
||||
'description': meta.get('description'),
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(meta.get('duration')),
|
||||
'uploader': try_get(meta, lambda x: x['user']['name']),
|
||||
'view_count': meta.get('viewsCount'),
|
||||
'like_count': meta.get('likesCount'),
|
||||
'comment_count': meta.get('commentsCount'),
|
||||
'tags': meta.get('tagList'),
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
|
||||
_WORKING = False
|
||||
IE_DESC = 'Murrtube user profile'
|
||||
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
|
||||
_TEST = {
|
||||
'url': 'https://murrtube.net/stormy',
|
||||
'info_dict': {
|
||||
'id': 'stormy',
|
||||
},
|
||||
'playlist_mincount': 27,
|
||||
}
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _fetch_page(self, username, user_id, page):
|
||||
data = self._download_gql(username, {
|
||||
'operationName': 'Media',
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
@@ -498,10 +499,8 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
m3u8_id=format_id, fatal=False))
|
||||
continue
|
||||
tbr = int_or_none(va.get('bitrate'), 1000)
|
||||
if tbr:
|
||||
format_id += f'-{tbr}'
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'format_id': join_nonempty(format_id, tbr),
|
||||
'url': public_url,
|
||||
'width': int_or_none(va.get('width')),
|
||||
'height': int_or_none(va.get('height')),
|
||||
|
||||
@@ -22,12 +22,22 @@ from ..utils import (
|
||||
|
||||
|
||||
class NetEaseMusicBaseIE(InfoExtractor):
|
||||
_FORMATS = ['bMusic', 'mMusic', 'hMusic']
|
||||
# XXX: _extract_formats logic depends on the order of the levels in each tier
|
||||
_LEVELS = (
|
||||
'standard', # free tier; 标准; 128kbps mp3 or aac
|
||||
'higher', # free tier; 192kbps mp3 or aac
|
||||
'exhigh', # free tier; 极高 (HQ); 320kbps mp3 or aac
|
||||
'lossless', # VIP tier; 无损 (SQ); 48kHz/16bit flac
|
||||
'hires', # VIP tier; 高解析度无损 (Hi-Res); 192kHz/24bit flac
|
||||
'jyeffect', # VIP tier; 高清臻音 (Spatial Audio); 96kHz/24bit flac
|
||||
'jymaster', # SVIP tier; 超清母带 (Master); 192kHz/24bit flac
|
||||
'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac
|
||||
)
|
||||
_API_BASE = 'http://music.163.com/api/'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@staticmethod
|
||||
def kilo_or_none(value):
|
||||
def _kilo_or_none(value):
|
||||
return int_or_none(value, scale=1000)
|
||||
|
||||
def _create_eapi_cipher(self, api_path, query_body, cookies):
|
||||
@@ -66,45 +76,43 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
**headers,
|
||||
}, **kwargs)
|
||||
|
||||
def _call_player_api(self, song_id, bitrate):
|
||||
def _call_player_api(self, song_id, level):
|
||||
return self._download_eapi_json(
|
||||
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate},
|
||||
note=f'Downloading song URL info: bitrate {bitrate}')
|
||||
'/song/enhance/player/url/v1', song_id,
|
||||
{'ids': f'[{song_id}]', 'level': level, 'encodeType': 'flac'},
|
||||
note=f'Downloading song URL info: level {level}')
|
||||
|
||||
def extract_formats(self, info):
|
||||
err = 0
|
||||
def _extract_formats(self, info):
|
||||
formats = []
|
||||
song_id = info['id']
|
||||
for song_format in self._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
for level in self._LEVELS:
|
||||
song = traverse_obj(
|
||||
self._call_player_api(song_id, level), ('data', lambda _, v: url_or_none(v['url']), any))
|
||||
if not song:
|
||||
break # Media is not available due to removal or geo-restriction
|
||||
actual_level = song.get('level')
|
||||
if actual_level and actual_level != level:
|
||||
if level in ('lossless', 'jymaster'):
|
||||
break # We've already extracted the highest level of the user's account tier
|
||||
continue
|
||||
bitrate = int_or_none(details.get('bitrate')) or 999000
|
||||
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
|
||||
song_url = song['url']
|
||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'format_id': song_format,
|
||||
'asr': traverse_obj(details, ('sr', {int_or_none})),
|
||||
**traverse_obj(song, {
|
||||
'ext': ('type', {str}),
|
||||
'abr': ('br', {self.kilo_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
elif err == 0:
|
||||
err = traverse_obj(song, ('code', {int})) or 0
|
||||
|
||||
formats.append({
|
||||
'url': song['url'],
|
||||
'format_id': level,
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(song, {
|
||||
'ext': ('type', {str}),
|
||||
'abr': ('br', {self._kilo_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
if not actual_level:
|
||||
break # Only 1 level is available if API does not return a value (netease:program)
|
||||
if not formats:
|
||||
if err != 0 and (err < 200 or err >= 400):
|
||||
raise ExtractorError(f'No media links found (site code {err})', expected=True)
|
||||
else:
|
||||
self.raise_geo_restricted(
|
||||
'No media links found: probably due to geo restriction.', countries=['CN'])
|
||||
self.raise_geo_restricted(
|
||||
'No media links found; possibly due to geo restriction', countries=['CN'])
|
||||
return formats
|
||||
|
||||
def query_api(self, endpoint, video_id, note):
|
||||
def _query_api(self, endpoint, video_id, note):
|
||||
result = self._download_json(
|
||||
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
|
||||
code = traverse_obj(result, ('code', {int}))
|
||||
@@ -128,32 +136,29 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:song'
|
||||
IE_DESC = '网易云音乐'
|
||||
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://music.163.com/#/song?id=548648087',
|
||||
'url': 'https://music.163.com/#/song?id=550136151',
|
||||
'info_dict': {
|
||||
'id': '548648087',
|
||||
'id': '550136151',
|
||||
'ext': 'mp3',
|
||||
'title': '戒烟 (Live)',
|
||||
'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊',
|
||||
'title': 'It\'s Ok (Live)',
|
||||
'creators': 'count:10',
|
||||
'timestamp': 1522944000,
|
||||
'upload_date': '20180405',
|
||||
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
|
||||
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
|
||||
'duration': 256,
|
||||
'description': 'md5:9fd07059c2ccee3950dc8363429a3135',
|
||||
'duration': 197,
|
||||
'thumbnail': r're:^http.*\.jpg',
|
||||
'album': '偶像练习生 表演曲目合集',
|
||||
'average_rating': int,
|
||||
'album_artist': '偶像练习生',
|
||||
'album_artists': ['偶像练习生'],
|
||||
},
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
'info_dict': {
|
||||
'id': '17241424',
|
||||
'ext': 'mp3',
|
||||
'title': 'Opus 28',
|
||||
'creator': 'Dustin O\'Halloran',
|
||||
'upload_date': '20080211',
|
||||
'timestamp': 1202745600,
|
||||
'duration': 263,
|
||||
@@ -161,15 +166,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'album': 'Piano Solos Vol. 2',
|
||||
'album_artist': 'Dustin O\'Halloran',
|
||||
'average_rating': int,
|
||||
'description': '[00:05.00]纯音乐,请欣赏\n',
|
||||
'album_artists': ['Dustin O\'Halloran'],
|
||||
'creators': ['Dustin O\'Halloran'],
|
||||
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||
'md5': '95826c73ea50b1c288b22180ec9e754d',
|
||||
'md5': 'b896be78d8d34bd7bb665b26710913ff',
|
||||
'info_dict': {
|
||||
'id': '95670',
|
||||
'ext': 'mp3',
|
||||
'title': '国际歌',
|
||||
'creator': '马备',
|
||||
'upload_date': '19911130',
|
||||
'timestamp': 691516800,
|
||||
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
||||
@@ -180,6 +188,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'average_rating': int,
|
||||
'album': '红色摇滚',
|
||||
'album_artist': '侯牧人',
|
||||
'creators': ['马备'],
|
||||
'album_artists': ['侯牧人'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://music.163.com/#/song?id=32102397',
|
||||
@@ -188,7 +198,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'id': '32102397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bad Blood',
|
||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||
'creators': ['Taylor Swift', 'Kendrick Lamar'],
|
||||
'upload_date': '20150516',
|
||||
'timestamp': 1431792000,
|
||||
'description': 'md5:21535156efb73d6d1c355f95616e285a',
|
||||
@@ -207,7 +217,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'id': '22735043',
|
||||
'ext': 'mp3',
|
||||
'title': '소원을 말해봐 (Genie)',
|
||||
'creator': '少女时代',
|
||||
'creators': ['少女时代'],
|
||||
'upload_date': '20100127',
|
||||
'timestamp': 1264608000,
|
||||
'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
|
||||
@@ -251,12 +261,12 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
|
||||
|
||||
formats = self.extract_formats(info)
|
||||
formats = self._extract_formats(info)
|
||||
|
||||
lyrics = self._process_lyrics(self.query_api(
|
||||
lyrics = self._process_lyrics(self._query_api(
|
||||
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
|
||||
lyric_data = {
|
||||
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
|
||||
@@ -267,14 +277,14 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'id': song_id,
|
||||
'formats': formats,
|
||||
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
|
||||
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
|
||||
'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None,
|
||||
'creators': traverse_obj(info, ('artists', ..., 'name')) or None,
|
||||
'album_artists': traverse_obj(info, ('album', 'artists', ..., 'name')) or None,
|
||||
**lyric_data,
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
|
||||
'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
|
||||
'thumbnail': ('album', 'picUrl', {url_or_none}),
|
||||
'duration': ('duration', {self.kilo_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'album': ('album', 'name', {str}),
|
||||
'average_rating': ('score', {int_or_none}),
|
||||
}),
|
||||
@@ -284,7 +294,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:album'
|
||||
IE_DESC = '网易云音乐 - 专辑'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?album\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://music.163.com/#/album?id=133153666',
|
||||
'info_dict': {
|
||||
@@ -294,7 +304,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
'description': '桃几2021年翻唱合集',
|
||||
'thumbnail': r're:^http.*\.jpg',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'http://music.163.com/#/album?id=220780',
|
||||
'info_dict': {
|
||||
@@ -328,7 +338,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:singer'
|
||||
IE_DESC = '网易云音乐 - 歌手'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?artist\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'note': 'Singer has aliases.',
|
||||
'url': 'http://music.163.com/#/artist?id=10559',
|
||||
@@ -358,7 +368,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
def _real_extract(self, url):
|
||||
singer_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
|
||||
|
||||
name = join_nonempty(
|
||||
@@ -372,7 +382,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:playlist'
|
||||
IE_DESC = '网易云音乐 - 歌单'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/playlist?id=79177352',
|
||||
'info_dict': {
|
||||
@@ -405,11 +415,15 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
|
||||
'info_dict': {
|
||||
'id': '3733003',
|
||||
'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
|
||||
'title': 're:韩国Melon排行榜周榜(?: [0-9]{4}-[0-9]{2}-[0-9]{2})?',
|
||||
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
|
||||
'upload_date': '20200109',
|
||||
'uploader_id': '2937386',
|
||||
'tags': ['韩语', '榜单'],
|
||||
'uploader': 'Melon榜单',
|
||||
'timestamp': 1578569373,
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -426,7 +440,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
'tags': ('tags', ..., {str}),
|
||||
'uploader': ('creator', 'nickname', {str}),
|
||||
'uploader_id': ('creator', 'userId', {str_or_none}),
|
||||
'timestamp': ('updateTime', {self.kilo_or_none}),
|
||||
'timestamp': ('updateTime', {self._kilo_or_none}),
|
||||
}))
|
||||
if traverse_obj(info, ('playlist', 'specialType')) == 10:
|
||||
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
|
||||
@@ -437,7 +451,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:mv'
|
||||
IE_DESC = '网易云音乐 - MV'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?mv\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://music.163.com/#/mv?id=10958064',
|
||||
'info_dict': {
|
||||
@@ -445,7 +459,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': '交换余生',
|
||||
'description': 'md5:e845872cff28820642a2b02eda428fea',
|
||||
'creator': '林俊杰',
|
||||
'creators': ['林俊杰'],
|
||||
'upload_date': '20200916',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
'duration': 364,
|
||||
@@ -460,7 +474,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': '이럴거면 그러지말지',
|
||||
'description': '白雅言自作曲唱甜蜜爱情',
|
||||
'creator': '白娥娟',
|
||||
'creators': ['白娥娟'],
|
||||
'upload_date': '20150520',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
'duration': 216,
|
||||
@@ -468,12 +482,28 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'This MV has multiple creators.',
|
||||
'url': 'https://music.163.com/#/mv?id=22593543',
|
||||
'info_dict': {
|
||||
'id': '22593543',
|
||||
'ext': 'mp4',
|
||||
'title': '老北京杀器',
|
||||
'creators': ['秃子2z', '辉子', 'Saber梁维嘉'],
|
||||
'duration': 206,
|
||||
'upload_date': '20240618',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mv_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
|
||||
|
||||
formats = [
|
||||
@@ -484,13 +514,13 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
return {
|
||||
'id': mv_id,
|
||||
'formats': formats,
|
||||
'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
|
||||
'creator': ('artistName', {str}),
|
||||
'upload_date': ('publishTime', {unified_strdate}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'duration': ('duration', {self.kilo_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
@@ -501,7 +531,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:program'
|
||||
IE_DESC = '网易云音乐 - 电台节目'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/program?id=10109055',
|
||||
'info_dict': {
|
||||
@@ -509,7 +539,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': '不丹足球背后的故事',
|
||||
'description': '喜马拉雅人的足球梦 ...',
|
||||
'creator': '大话西藏',
|
||||
'creators': ['大话西藏'],
|
||||
'timestamp': 1434179287,
|
||||
'upload_date': '20150613',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
@@ -522,7 +552,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'id': '10141022',
|
||||
'title': '滚滚电台的有声节目',
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
'creator': '滚滚电台ORZ',
|
||||
'creators': ['滚滚电台ORZ'],
|
||||
'timestamp': 1434450733,
|
||||
'upload_date': '20150616',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
@@ -536,7 +566,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': '滚滚电台的有声节目',
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
'creator': '滚滚电台ORZ',
|
||||
'creators': ['滚滚电台ORZ'],
|
||||
'timestamp': 1434450733,
|
||||
'upload_date': '20150616',
|
||||
'thumbnail': r're:http.*\.jpg',
|
||||
@@ -550,7 +580,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
def _real_extract(self, url):
|
||||
program_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
|
||||
|
||||
metainfo = traverse_obj(info, {
|
||||
@@ -558,17 +588,17 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'description': ('description', {str}),
|
||||
'creator': ('dj', 'brand', {str}),
|
||||
'thumbnail': ('coverUrl', {url_or_none}),
|
||||
'timestamp': ('createTime', {self.kilo_or_none}),
|
||||
'timestamp': ('createTime', {self._kilo_or_none}),
|
||||
})
|
||||
|
||||
if not self._yes_playlist(
|
||||
info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
|
||||
formats = self.extract_formats(info['mainSong'])
|
||||
formats = self._extract_formats(info['mainSong'])
|
||||
|
||||
return {
|
||||
'id': str(info['mainSong']['id']),
|
||||
'formats': formats,
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
|
||||
**metainfo,
|
||||
}
|
||||
|
||||
@@ -579,7 +609,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:djradio'
|
||||
IE_DESC = '网易云音乐 - 电台'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?djradio\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://music.163.com/#/djradio?id=42',
|
||||
'info_dict': {
|
||||
@@ -597,7 +627,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||
metainfo = {}
|
||||
entries = []
|
||||
for offset in itertools.count(start=0, step=self._PAGE_SIZE):
|
||||
info = self.query_api(
|
||||
info = self._query_api(
|
||||
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
|
||||
dj_id, note=f'Downloading dj programs - {offset}')
|
||||
|
||||
|
||||
@@ -371,7 +371,7 @@ class NexxIE(InfoExtractor):
|
||||
# not all videos work via arc, e.g. nexx:741:1269984
|
||||
if not video:
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(1e4, 99999)}{random.randint(1, 9)}'
|
||||
device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(10000, 99999)}{random.randint(1, 9)}'
|
||||
|
||||
result = self._call_api(domain_id, 'session/init', video_id, data={
|
||||
'nxp_devh': device_id,
|
||||
|
||||
@@ -4,6 +4,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
filter_dict,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
@@ -590,21 +591,22 @@ class NhkRadiruIE(InfoExtractor):
|
||||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
|
||||
'skip': 'Episode expired on 2024-02-24',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239',
|
||||
'skip': 'Episode expired on 2024-06-09',
|
||||
'info_dict': {
|
||||
'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
|
||||
'id': '0449_01_3926210',
|
||||
'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集',
|
||||
'id': '0449_01_4003239',
|
||||
'ext': 'm4a',
|
||||
'uploader': 'NHK FM 東京',
|
||||
'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc',
|
||||
'series': 'ジャズ・トゥナイト',
|
||||
'uploader': 'NHK-FM',
|
||||
'channel': 'NHK-FM',
|
||||
'channel': 'NHK FM 東京',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||
'release_date': '20240217',
|
||||
'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
|
||||
'timestamp': 1708185600,
|
||||
'release_timestamp': 1708178400,
|
||||
'upload_date': '20240217',
|
||||
'upload_date': '20240601',
|
||||
'series_id': '0449_01',
|
||||
'release_date': '20240601',
|
||||
'timestamp': 1717257600,
|
||||
'release_timestamp': 1717250400,
|
||||
},
|
||||
}, {
|
||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||
@@ -613,71 +615,145 @@ class NhkRadiruIE(InfoExtractor):
|
||||
'id': '0458_01',
|
||||
'title': 'ベストオブクラシック',
|
||||
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
|
||||
'channel': 'NHK-FM',
|
||||
'uploader': 'NHK-FM',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
|
||||
'series_id': '0458_01',
|
||||
'uploader': 'NHK FM',
|
||||
'channel': 'NHK FM',
|
||||
'series': 'ベストオブクラシック',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
# one with letters in the id
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470',
|
||||
'note': 'Expires on 2024-03-31',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688',
|
||||
'note': 'Expires on 2025-03-31',
|
||||
'info_dict': {
|
||||
'id': 'F300_06_3738470',
|
||||
'id': 'F683_01_3910688',
|
||||
'ext': 'm4a',
|
||||
'title': '有島武郎「一房のぶどう」',
|
||||
'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)',
|
||||
'channel': 'NHKラジオ第1、NHK-FM',
|
||||
'uploader': 'NHKラジオ第1、NHK-FM',
|
||||
'timestamp': 1635757200,
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
|
||||
'release_date': '20161207',
|
||||
'series': 'らじる文庫 by ラジオ深夜便 ',
|
||||
'release_timestamp': 1481126700,
|
||||
'upload_date': '20211101',
|
||||
'title': '夏目漱石「文鳥」第1回',
|
||||
'series': '【らじる文庫】夏目漱石「文鳥」(全4回)',
|
||||
'series_id': 'F683_01',
|
||||
'description': '朗読:浅井理アナウンサー',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg',
|
||||
'upload_date': '20240106',
|
||||
'release_date': '20240106',
|
||||
'uploader': 'NHK R1',
|
||||
'release_timestamp': 1704511800,
|
||||
'channel': 'NHK R1',
|
||||
'timestamp': 1704512700,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
|
||||
'expected_warnings': ['Unable to download JSON metadata',
|
||||
'Failed to get extended metadata. API returned Error 1: Invalid parameters'],
|
||||
}, {
|
||||
# news
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
|
||||
'skip': 'Expires on 2023-04-17',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173',
|
||||
'info_dict': {
|
||||
'id': 'F261_01_3855109',
|
||||
'id': 'F261_01_4012173',
|
||||
'ext': 'm4a',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'uploader': 'NHKラジオ第1',
|
||||
'timestamp': 1681635900,
|
||||
'release_date': '20230416',
|
||||
'series': 'NHKラジオニュース',
|
||||
'title': '午後6時のNHKニュース',
|
||||
'title': '午前0時のNHKニュース',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||
'upload_date': '20230416',
|
||||
'release_timestamp': 1681635600,
|
||||
'release_timestamp': 1718290800,
|
||||
'release_date': '20240613',
|
||||
'timestamp': 1718291400,
|
||||
'upload_date': '20240613',
|
||||
},
|
||||
}, {
|
||||
# fallback when extended metadata fails
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298',
|
||||
'skip': 'Expires on 2024-06-07',
|
||||
'info_dict': {
|
||||
'id': '2834_01_4009298',
|
||||
'title': 'まち☆キラ!開成町特集',
|
||||
'ext': 'm4a',
|
||||
'release_date': '20240531',
|
||||
'upload_date': '20240531',
|
||||
'series': 'はま☆キラ!',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg',
|
||||
'channel': 'NHK R1,FM',
|
||||
'description': '',
|
||||
'timestamp': 1717123800,
|
||||
'uploader': 'NHK R1,FM',
|
||||
'release_timestamp': 1717120800,
|
||||
'series_id': '2834_01',
|
||||
},
|
||||
'expected_warnings': ['Failed to get extended metadata. API returned empty list.'],
|
||||
}]
|
||||
|
||||
_API_URL_TMPL = None
|
||||
|
||||
def _extract_extended_description(self, episode_id, episode):
|
||||
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
|
||||
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
|
||||
def _extract_extended_metadata(self, episode_id, aa_vinfo):
|
||||
service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')}))
|
||||
detail_url = try_call(
|
||||
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
|
||||
lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3]))
|
||||
if not detail_url:
|
||||
return
|
||||
return {}
|
||||
|
||||
full_meta = traverse_obj(
|
||||
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
|
||||
('list', service, 0, {dict})) or {}
|
||||
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
|
||||
response = self._download_json(
|
||||
detail_url, episode_id, 'Downloading extended metadata',
|
||||
'Failed to download extended metadata', fatal=False, expected_status=400)
|
||||
if not response:
|
||||
return {}
|
||||
|
||||
def _extract_episode_info(self, headline, programme_id, series_meta):
|
||||
if error := traverse_obj(response, ('error', {dict})):
|
||||
self.report_warning(
|
||||
'Failed to get extended metadata. API returned '
|
||||
f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}')
|
||||
return {}
|
||||
|
||||
full_meta = traverse_obj(response, ('list', service, 0, {dict}))
|
||||
if not full_meta:
|
||||
self.report_warning('Failed to get extended metadata. API returned empty list.')
|
||||
return {}
|
||||
|
||||
station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None
|
||||
thumbnails = [{
|
||||
'id': str(id_),
|
||||
'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1,
|
||||
**traverse_obj(thumb, {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
} for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))]
|
||||
|
||||
return filter_dict({
|
||||
'channel': station,
|
||||
'uploader': station,
|
||||
'description': join_nonempty(
|
||||
'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta),
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(full_meta, {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('end_time', {unified_timestamp}),
|
||||
'release_timestamp': ('start_time', {unified_timestamp}),
|
||||
}),
|
||||
})
|
||||
|
||||
def _extract_episode_info(self, episode, programme_id, series_meta):
|
||||
episode_id = f'{programme_id}_{episode["id"]}'
|
||||
aa_vinfo = traverse_obj(episode, ('aa_contents_id', {lambda x: x.split(';')}))
|
||||
extended_metadata = self._extract_extended_metadata(episode_id, aa_vinfo)
|
||||
fallback_start_time, _, fallback_end_time = traverse_obj(
|
||||
aa_vinfo, (4, {str}, {lambda x: (x or '').partition('_')}))
|
||||
|
||||
return {
|
||||
**series_meta,
|
||||
'id': episode_id,
|
||||
'formats': self._extract_m3u8_formats(episode.get('stream_url'), episode_id, fatal=False),
|
||||
'container': 'm4a_dash', # force fixup, AAC-only HLS
|
||||
'was_live': True,
|
||||
'title': episode.get('program_title'),
|
||||
'description': episode.get('program_sub_title'), # fallback
|
||||
'timestamp': unified_timestamp(fallback_end_time),
|
||||
'release_timestamp': unified_timestamp(fallback_start_time),
|
||||
**extended_metadata,
|
||||
}
|
||||
|
||||
def _extract_news_info(self, headline, programme_id, series_meta):
|
||||
episode_id = f'{programme_id}_{headline["headline_id"]}'
|
||||
episode = traverse_obj(headline, ('file_list', 0, {dict}))
|
||||
description = self._extract_extended_description(episode_id, episode)
|
||||
if not description:
|
||||
self.report_warning('Failed to get extended description, falling back to summary')
|
||||
description = traverse_obj(episode, ('file_title_sub', {str}))
|
||||
|
||||
return {
|
||||
**series_meta,
|
||||
@@ -687,9 +763,9 @@ class NhkRadiruIE(InfoExtractor):
|
||||
'was_live': True,
|
||||
'series': series_meta.get('title'),
|
||||
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
|
||||
'description': description,
|
||||
**traverse_obj(episode, {
|
||||
'title': 'file_title',
|
||||
'title': ('file_title', {str}),
|
||||
'description': ('file_title_sub', {str}),
|
||||
'timestamp': ('open_time', {unified_timestamp}),
|
||||
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
|
||||
}),
|
||||
@@ -706,32 +782,58 @@ class NhkRadiruIE(InfoExtractor):
|
||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||
programme_id = f'{site_id}_{corner_id}'
|
||||
|
||||
if site_id == 'F261':
|
||||
json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json'
|
||||
else:
|
||||
json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
|
||||
if site_id == 'F261': # XXX: News programmes use old API (for now?)
|
||||
meta = self._download_json(
|
||||
'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main']
|
||||
series_meta = traverse_obj(meta, {
|
||||
'title': ('program_name', {str}),
|
||||
'channel': ('media_name', {str}),
|
||||
'uploader': ('media_name', {str}),
|
||||
'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
|
||||
}, get_all=False)
|
||||
|
||||
meta = self._download_json(json_url, programme_id)['main']
|
||||
if headline_id:
|
||||
headline = traverse_obj(
|
||||
meta, ('detail_list', lambda _, v: v['headline_id'] == headline_id, any))
|
||||
if not headline:
|
||||
raise ExtractorError('Content not found; it has most likely expired', expected=True)
|
||||
return self._extract_news_info(headline, programme_id, series_meta)
|
||||
|
||||
series_meta = traverse_obj(meta, {
|
||||
'title': 'program_name',
|
||||
'channel': 'media_name',
|
||||
'uploader': 'media_name',
|
||||
'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
|
||||
}, get_all=False)
|
||||
def news_entries():
|
||||
for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
|
||||
yield self._extract_news_info(headline, programme_id, series_meta)
|
||||
|
||||
return self.playlist_result(
|
||||
news_entries(), programme_id, description=meta.get('site_detail'), **series_meta)
|
||||
|
||||
meta = self._download_json(
|
||||
'https://www.nhk.or.jp/radio-api/app/v1/web/ondemand/series', programme_id, query={
|
||||
'site_id': site_id,
|
||||
'corner_site_id': corner_id,
|
||||
})
|
||||
|
||||
fallback_station = join_nonempty('NHK', traverse_obj(meta, ('radio_broadcast', {str})), delim=' ')
|
||||
series_meta = {
|
||||
'series': join_nonempty('title', 'corner_name', delim=' ', from_dict=meta),
|
||||
'series_id': programme_id,
|
||||
'thumbnail': traverse_obj(meta, ('thumbnail_url', {url_or_none})),
|
||||
'channel': fallback_station,
|
||||
'uploader': fallback_station,
|
||||
}
|
||||
|
||||
if headline_id:
|
||||
return self._extract_episode_info(
|
||||
traverse_obj(meta, (
|
||||
'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False),
|
||||
programme_id, series_meta)
|
||||
episode = traverse_obj(meta, ('episodes', lambda _, v: v['id'] == int(headline_id), any))
|
||||
if not episode:
|
||||
raise ExtractorError('Content not found; it has most likely expired', expected=True)
|
||||
return self._extract_episode_info(episode, programme_id, series_meta)
|
||||
|
||||
def entries():
|
||||
for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
|
||||
yield self._extract_episode_info(headline, programme_id, series_meta)
|
||||
for episode in traverse_obj(meta, ('episodes', ..., {dict})):
|
||||
yield self._extract_episode_info(episode, programme_id, series_meta)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta)
|
||||
entries(), programme_id, title=series_meta.get('series'),
|
||||
description=meta.get('series_description'), **series_meta)
|
||||
|
||||
|
||||
class NhkRadioNewsPageIE(InfoExtractor):
|
||||
|
||||
@@ -2,6 +2,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
@@ -41,7 +42,7 @@ class NHLBaseIE(InfoExtractor):
|
||||
else:
|
||||
height = int_or_none(playback.get('height'))
|
||||
formats.append({
|
||||
'format_id': playback.get('name', 'http' + (f'-{height}p' if height else '')),
|
||||
'format_id': playback.get('name') or join_nonempty('http', height and f'{height}p'),
|
||||
'url': playback_url,
|
||||
'width': int_or_none(playback.get('width')),
|
||||
'height': height,
|
||||
|
||||
@@ -40,7 +40,6 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||
'info_dict': {
|
||||
'id': 'sm22312215',
|
||||
'ext': 'mp4',
|
||||
@@ -56,8 +55,8 @@ class NiconicoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'genres': ['未設定'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# File downloaded with and without credentials are different, so omit
|
||||
# the md5 field
|
||||
@@ -77,8 +76,8 @@ class NiconicoIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'genres': ['音楽・サウンド'],
|
||||
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# 'video exists but is marked as "deleted"
|
||||
# md5 is unstable
|
||||
@@ -112,7 +111,6 @@ class NiconicoIE(InfoExtractor):
|
||||
}, {
|
||||
# video not available via `getflv`; "old" HTML5 video
|
||||
'url': 'http://www.nicovideo.jp/watch/sm1151009',
|
||||
'md5': 'f95a3d259172667b293530cc2e41ebda',
|
||||
'info_dict': {
|
||||
'id': 'sm1151009',
|
||||
'ext': 'mp4',
|
||||
@@ -128,11 +126,10 @@ class NiconicoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'genres': ['ゲーム'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# "New" HTML5 video
|
||||
# md5 is unstable
|
||||
'url': 'http://www.nicovideo.jp/watch/sm31464864',
|
||||
'info_dict': {
|
||||
'id': 'sm31464864',
|
||||
@@ -149,12 +146,11 @@ class NiconicoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'genres': ['アニメ'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Video without owner
|
||||
'url': 'http://www.nicovideo.jp/watch/sm18238488',
|
||||
'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
|
||||
'info_dict': {
|
||||
'id': 'sm18238488',
|
||||
'ext': 'mp4',
|
||||
@@ -168,8 +164,8 @@ class NiconicoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'genres': ['エンターテイメント'],
|
||||
'tags': [],
|
||||
'expected_protocol': str,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||
'only_matching': True,
|
||||
@@ -424,7 +420,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'x-request-with': 'https://www.nicovideo.jp',
|
||||
})['data']['contentUrl']
|
||||
# Getting all audio formats results in duplicate video formats which we filter out later
|
||||
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
|
||||
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
|
||||
|
||||
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
|
||||
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
|
||||
@@ -436,7 +432,6 @@ class NiconicoIE(InfoExtractor):
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
}), get_all=False),
|
||||
'acodec': 'aac',
|
||||
'ext': 'm4a',
|
||||
}
|
||||
|
||||
# Sort before removing dupes to keep the format dicts with the lowest tbr
|
||||
@@ -458,9 +453,11 @@ class NiconicoIE(InfoExtractor):
|
||||
if video_id.startswith('so'):
|
||||
video_id = self._match_id(handle.url)
|
||||
|
||||
api_data = self._parse_json(self._html_search_regex(
|
||||
'data-api-data="([^"]+)"', webpage,
|
||||
'API data', default='{}'), video_id)
|
||||
api_data = traverse_obj(
|
||||
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
|
||||
('data', 'response', {dict}))
|
||||
if not api_data:
|
||||
raise ExtractorError('Server response data not found')
|
||||
except ExtractorError as e:
|
||||
try:
|
||||
api_data = self._download_json(
|
||||
@@ -872,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE):
|
||||
|
||||
|
||||
class NiconicoUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'https://www.nicovideo.jp/user/419948',
|
||||
'info_dict': {
|
||||
@@ -880,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}
|
||||
_API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
|
||||
_API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
_API_HEADERS = {
|
||||
@@ -900,12 +897,13 @@ class NiconicoUserIE(InfoExtractor):
|
||||
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||
for entry in json_parsed['data']['items']:
|
||||
count += 1
|
||||
yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id']))
|
||||
yield self.url_result(
|
||||
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
||||
|
||||
class NiconicoLiveIE(InfoExtractor):
|
||||
|
||||
@@ -43,14 +43,8 @@ class NoodleMagazineIE(InfoExtractor):
|
||||
def build_url(url_or_path):
|
||||
return urljoin('https://adult.noodlemagazine.com', url_or_path)
|
||||
|
||||
headers = {'Referer': url}
|
||||
player_path = self._html_search_regex(
|
||||
r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path')
|
||||
player_iframe = self._download_webpage(
|
||||
build_url(player_path), video_id, 'Downloading iframe page', headers=headers)
|
||||
playlist_url = self._search_regex(
|
||||
r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
|
||||
playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers)
|
||||
playlist_info = self._search_json(
|
||||
r'window\.playlist\s*=', webpage, video_id, 'playlist info')
|
||||
|
||||
formats = []
|
||||
for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):
|
||||
|
||||
@@ -43,15 +43,17 @@ class NuumBaseIE(InfoExtractor):
|
||||
is_live = media.get('media_status') == 'RUNNING'
|
||||
|
||||
formats, subtitles = None, None
|
||||
headers = {'Referer': 'https://nuum.ru/'}
|
||||
if extract_formats:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live)
|
||||
media_url, video_id, 'mp4', live=is_live, headers=headers)
|
||||
|
||||
return filter_dict({
|
||||
'id': video_id,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
**traverse_obj(container, {
|
||||
'title': ('media_container_name', {str}),
|
||||
'description': ('media_container_description', {str}),
|
||||
@@ -78,7 +80,7 @@ class NuumMediaIE(NuumBaseIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
|
||||
'md5': 'f1d9118a30403e32b702a204eb03aca3',
|
||||
'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
|
||||
'info_dict': {
|
||||
'id': '1567547',
|
||||
'ext': 'mp4',
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NZOnScreenIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
|
||||
class NZZIE(InfoExtractor):
|
||||
@@ -22,19 +19,14 @@ class NZZIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _entries(self, webpage, page_id):
|
||||
for script in re.findall(r'(?s)<script[^>]* data-hid="jw-video-jw[^>]+>(.+?)</script>', webpage):
|
||||
settings = self._search_json(r'var\s+settings\s*=[^{]*', script, 'settings', page_id, fatal=False)
|
||||
if entry := self._parse_jwplayer_data(settings, page_id):
|
||||
yield entry
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
entries = []
|
||||
for player_element in re.findall(
|
||||
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
|
||||
player_params = extract_attributes(player_element)
|
||||
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
||||
self.report_warning('Unsupported player type')
|
||||
continue
|
||||
entry_id = player_params['data-id']
|
||||
entries.append(self.url_result(
|
||||
'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
|
||||
|
||||
return self.playlist_result(entries, page_id)
|
||||
return self.playlist_result(self._entries(webpage, page_id), page_id)
|
||||
|
||||
@@ -1,9 +1,19 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, try_get
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
try_get,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class OlympicsReplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P<id>[^/#&?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?olympics\.com/[a-z]{2}/(?:paris-2024/)?(?:replay|videos?|original-series/episode)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays',
|
||||
'info_dict': {
|
||||
@@ -11,26 +21,105 @@ class OlympicsReplayIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020',
|
||||
'upload_date': '20210801',
|
||||
'timestamp': 1627783200,
|
||||
'timestamp': 1627797600,
|
||||
'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3',
|
||||
'uploader': 'International Olympic Committee',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/nua4o7zwyaznoaejpbk2',
|
||||
'duration': 7017.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp',
|
||||
'only_matching': True,
|
||||
'url': 'https://olympics.com/en/original-series/episode/b-boys-and-b-girls-take-the-spotlight-breaking-life-road-to-paris-2024',
|
||||
'info_dict': {
|
||||
'id': '32633650-c5ee-4280-8b94-fb6defb6a9b5',
|
||||
'ext': 'mp4',
|
||||
'title': 'B-girl Nicka - Breaking Life, Road to Paris 2024 | Episode 1',
|
||||
'upload_date': '20240517',
|
||||
'timestamp': 1715948200,
|
||||
'description': 'md5:f63d728a41270ec628f6ac33ce471bb1',
|
||||
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/a3j96l7j6so3vyfijby1',
|
||||
'duration': 1321.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://olympics.com/en/paris-2024/videos/men-s-preliminaries-gbr-esp-ned-rsa-hockey-olympic-games-paris-2024',
|
||||
'info_dict': {
|
||||
'id': '3d96db23-8eee-4b7c-8ef5-488a0361026c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Men\'s Preliminaries GBR-ESP & NED-RSA | Hockey | Olympic Games Paris 2024',
|
||||
'upload_date': '20240727',
|
||||
'timestamp': 1722066600,
|
||||
},
|
||||
'skip': 'Geo-restricted to RU, BR, BT, NP, TM, BD, TL',
|
||||
}, {
|
||||
'url': 'https://olympics.com/en/paris-2024/videos/dnp-suni-lee-i-have-goals-and-i-have-expectations-for-myself-but-i-also-am-trying-to-give-myself-grace',
|
||||
'info_dict': {
|
||||
'id': 'a42f37ab-8a74-41d0-a7d9-af27b7b02a90',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c7cfbc9918636a98e66400a812e4d407',
|
||||
'upload_date': '20240729',
|
||||
'timestamp': 1722288600,
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _extract_from_nextjs_data(self, webpage, video_id):
|
||||
data = traverse_obj(self._search_nextjs_data(webpage, video_id, default={}), (
|
||||
'props', 'pageProps', 'page', 'items',
|
||||
lambda _, v: v['name'] == 'videoPlaylist', 'data', 'currentVideo', {dict}, any))
|
||||
if not data:
|
||||
return None
|
||||
|
||||
geo_countries = traverse_obj(data, ('countries', ..., {str}))
|
||||
if traverse_obj(data, ('geoRestrictedVideo', {bool})):
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
|
||||
is_live = traverse_obj(data, ('streamingStatus', {str})) == 'LIVE'
|
||||
m3u8_url = traverse_obj(data, ('videoUrl', {url_or_none})) or data['streamUrl']
|
||||
tokenized_url = self._tokenize_url(m3u8_url, data['jwtToken'], is_live, video_id)
|
||||
|
||||
try:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
tokenized_url, video_id, 'mp4', m3u8_id='hls')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and 'georestricted' in e.cause.msg:
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
raise
|
||||
|
||||
return {
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
**traverse_obj(data, {
|
||||
'id': ('videoID', {str}),
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('contentDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _tokenize_url(self, url, token, is_live, video_id):
|
||||
return self._download_json(
|
||||
'https://metering.olympics.com/tokengenerator', video_id,
|
||||
'Downloading tokenized m3u8 url', query={
|
||||
**parse_qs(url),
|
||||
'url': update_url(url, query=None),
|
||||
'service-id': 'live' if is_live else 'vod',
|
||||
'user-auth': token,
|
||||
})['data']['url']
|
||||
|
||||
def _legacy_tokenize_url(self, url, video_id):
|
||||
return self._download_json(
|
||||
'https://olympics.com/tokenGenerator', video_id,
|
||||
'Downloading legacy tokenized m3u8 url', query={'url': url})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if info := self._extract_from_nextjs_data(webpage, video_id):
|
||||
return info
|
||||
|
||||
title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
|
||||
uuid = self._html_search_meta('episode_uid', webpage)
|
||||
video_uuid = self._html_search_meta('episode_uid', webpage)
|
||||
m3u8_url = self._html_search_meta('video_url', webpage)
|
||||
json_ld = self._search_json_ld(webpage, uuid)
|
||||
json_ld = self._search_json_ld(webpage, video_uuid)
|
||||
thumbnails_list = json_ld.get('image')
|
||||
if not thumbnails_list:
|
||||
thumbnails_list = self._html_search_regex(
|
||||
@@ -48,12 +137,12 @@ class OlympicsReplayIE(InfoExtractor):
|
||||
'width': width,
|
||||
'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)),
|
||||
})
|
||||
m3u8_url = self._download_json(
|
||||
f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls')
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
self._legacy_tokenize_url(m3u8_url, video_uuid), video_uuid, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'id': video_uuid,
|
||||
'title': title,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user