mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-24 00:49:06 +00:00
Merge branch 'master' into cineverse-extra_metadata
This commit is contained in:
@@ -208,6 +208,10 @@ from .bandcamp import (
|
||||
BandcampUserIE,
|
||||
BandcampWeeklyIE,
|
||||
)
|
||||
from .bandlab import (
|
||||
BandlabIE,
|
||||
BandlabPlaylistIE,
|
||||
)
|
||||
from .bannedvideo import BannedVideoIE
|
||||
from .bbc import (
|
||||
BBCIE,
|
||||
@@ -217,6 +221,7 @@ from .bbc import (
|
||||
BBCCoUkIPlayerGroupIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
)
|
||||
from .beacon import BeaconTvIE
|
||||
from .beatbump import (
|
||||
BeatBumpPlaylistIE,
|
||||
BeatBumpVideoIE,
|
||||
@@ -251,6 +256,7 @@ from .bilibili import (
|
||||
BilibiliCheeseIE,
|
||||
BilibiliCheeseSeasonIE,
|
||||
BilibiliCollectionListIE,
|
||||
BiliBiliDynamicIE,
|
||||
BilibiliFavoritesListIE,
|
||||
BiliBiliIE,
|
||||
BiliBiliPlayerIE,
|
||||
@@ -277,6 +283,7 @@ from .bleacherreport import (
|
||||
from .blerp import BlerpIE
|
||||
from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bluesky import BlueskyIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boosty import BoostyIE
|
||||
@@ -362,7 +369,10 @@ from .ccc import (
|
||||
)
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .cda import (
|
||||
CDAIE,
|
||||
CDAFolderIE,
|
||||
)
|
||||
from .cellebrite import CellebriteIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .cgtn import CGTNIE
|
||||
@@ -397,8 +407,6 @@ from .cmt import CMTIE
|
||||
from .cnbc import CNBCVideoIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNArticleIE,
|
||||
CNNBlogsIE,
|
||||
CNNIndonesiaIE,
|
||||
)
|
||||
from .comedycentral import (
|
||||
@@ -433,12 +441,6 @@ from .crowdbunker import (
|
||||
CrowdBunkerIE,
|
||||
)
|
||||
from .crtvg import CrtvgIE
|
||||
from .crunchyroll import (
|
||||
CrunchyrollArtistIE,
|
||||
CrunchyrollBetaIE,
|
||||
CrunchyrollBetaShowIE,
|
||||
CrunchyrollMusicIE,
|
||||
)
|
||||
from .cspan import (
|
||||
CSpanCongressIE,
|
||||
CSpanIE,
|
||||
@@ -452,7 +454,10 @@ from .curiositystream import (
|
||||
CuriosityStreamIE,
|
||||
CuriosityStreamSeriesIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .cwtv import (
|
||||
CWTVIE,
|
||||
CWTVMovieIE,
|
||||
)
|
||||
from .cybrary import (
|
||||
CybraryCourseIE,
|
||||
CybraryIE,
|
||||
@@ -548,6 +553,7 @@ from .dropout import (
|
||||
DropoutIE,
|
||||
DropoutSeasonIE,
|
||||
)
|
||||
from .drtalks import DrTalksIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import (
|
||||
DRTVIE,
|
||||
@@ -577,6 +583,10 @@ from .egghead import (
|
||||
EggheadCourseIE,
|
||||
EggheadLessonIE,
|
||||
)
|
||||
from .eggs import (
|
||||
EggsArtistIE,
|
||||
EggsIE,
|
||||
)
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
@@ -692,11 +702,6 @@ from .frontendmasters import (
|
||||
FrontendMastersLessonIE,
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import (
|
||||
FunimationIE,
|
||||
FunimationPageIE,
|
||||
FunimationShowIE,
|
||||
)
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
from .fuyintv import FuyinTVIE
|
||||
@@ -705,6 +710,7 @@ from .gab import (
|
||||
GabTVIE,
|
||||
)
|
||||
from .gaia import GaiaIE
|
||||
from .gamedevtv import GameDevTVDashboardIE
|
||||
from .gamejolt import (
|
||||
GameJoltCommunityIE,
|
||||
GameJoltGameIE,
|
||||
@@ -729,6 +735,7 @@ from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .germanupa import GermanupaIE
|
||||
from .getcourseru import (
|
||||
GetCourseRuIE,
|
||||
GetCourseRuPlayerIE,
|
||||
@@ -822,7 +829,10 @@ from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .huya import HuyaLiveIE
|
||||
from .huya import (
|
||||
HuyaLiveIE,
|
||||
HuyaVideoIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .hypergryph import MonsterSirenHypergryphMusicIE
|
||||
from .hytale import HytaleIE
|
||||
@@ -934,6 +944,10 @@ from .kaltura import KalturaIE
|
||||
from .kankanews import KankaNewsIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .kelbyone import KelbyOneIE
|
||||
from .kenh14 import (
|
||||
Kenh14PlaylistIE,
|
||||
Kenh14VideoIE,
|
||||
)
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
KhanAcademyUnitIE,
|
||||
@@ -945,6 +959,7 @@ from .kick import (
|
||||
)
|
||||
from .kicker import KickerIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kika import KikaIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
@@ -1036,10 +1051,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .livestreamfails import LivestreamfailsIE
|
||||
from .lnkgo import (
|
||||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .lnk import LnkIE
|
||||
from .loom import (
|
||||
LoomFolderIE,
|
||||
LoomIE,
|
||||
@@ -1125,12 +1137,6 @@ from .microsoftembed import (
|
||||
MicrosoftMediusIE,
|
||||
)
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .mildom import (
|
||||
MildomClipIE,
|
||||
MildomIE,
|
||||
MildomUserVodIE,
|
||||
MildomVodIE,
|
||||
)
|
||||
from .minds import (
|
||||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
@@ -1150,6 +1156,7 @@ from .mitele import MiTeleIE
|
||||
from .mixch import (
|
||||
MixchArchiveIE,
|
||||
MixchIE,
|
||||
MixchMovieIE,
|
||||
)
|
||||
from .mixcloud import (
|
||||
MixcloudIE,
|
||||
@@ -1164,6 +1171,7 @@ from .mlb import (
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mocha import MochaVideoIE
|
||||
from .mojevideo import MojevideoIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .motherless import (
|
||||
@@ -1268,6 +1276,10 @@ from .nebula import (
|
||||
)
|
||||
from .nekohacker import NekoHackerIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nest import (
|
||||
NestClipIE,
|
||||
NestIE,
|
||||
)
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
@@ -1510,8 +1522,8 @@ from .pgatour import PGATourIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pialive import PiaLiveIE
|
||||
from .piapro import PiaproIE
|
||||
from .piaulizaportal import PIAULIZAPortalIE
|
||||
from .picarto import (
|
||||
PicartoIE,
|
||||
PicartoVodIE,
|
||||
@@ -1522,6 +1534,10 @@ from .pinterest import (
|
||||
PinterestCollectionIE,
|
||||
PinterestIE,
|
||||
)
|
||||
from .piramidetv import (
|
||||
PiramideTVChannelIE,
|
||||
PiramideTVIE,
|
||||
)
|
||||
from .pixivsketch import (
|
||||
PixivSketchIE,
|
||||
PixivSketchUserIE,
|
||||
@@ -1541,16 +1557,13 @@ from .pluralsight import (
|
||||
PluralsightIE,
|
||||
)
|
||||
from .plutotv import PlutoTVIE
|
||||
from .plvideo import PlVideoIE
|
||||
from .podbayfm import (
|
||||
PodbayFMChannelIE,
|
||||
PodbayFMIE,
|
||||
)
|
||||
from .podchaser import PodchaserIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import (
|
||||
PokemonIE,
|
||||
PokemonWatchIE,
|
||||
)
|
||||
from .pokergo import (
|
||||
PokerGoCollectionIE,
|
||||
PokerGoIE,
|
||||
@@ -1641,6 +1654,7 @@ from .radiokapital import (
|
||||
RadioKapitalIE,
|
||||
RadioKapitalShowIE,
|
||||
)
|
||||
from .radioradicale import RadioRadicaleIE
|
||||
from .radiozet import RadioZetPodcastIE
|
||||
from .radlive import (
|
||||
RadLiveChannelIE,
|
||||
@@ -1810,6 +1824,7 @@ from .screen9 import Screen9IE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastify import ScreencastifyIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenrec import ScreenRecIE
|
||||
from .scrippsnetworks import (
|
||||
ScrippsNetworksIE,
|
||||
ScrippsNetworksWatchIE,
|
||||
@@ -1820,6 +1835,7 @@ from .scte import (
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .sejmpl import SejmIE
|
||||
from .sen import SenIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import (
|
||||
SenateGovIE,
|
||||
@@ -1875,6 +1891,7 @@ from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snapchat import SnapchatSpotlightIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
@@ -1929,9 +1946,7 @@ from .spotify import (
|
||||
)
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
SpreakerShowIE,
|
||||
SpreakerShowPageIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
@@ -1973,6 +1988,10 @@ from .streetvoice import StreetVoiceIE
|
||||
from .stretchinternet import StretchInternetIE
|
||||
from .stripchat import StripchatIE
|
||||
from .stv import STVPlayerIE
|
||||
from .subsplash import (
|
||||
SubsplashIE,
|
||||
SubsplashPlaylistIE,
|
||||
)
|
||||
from .substack import SubstackIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .sverigesradio import (
|
||||
@@ -2242,6 +2261,10 @@ from .ufctv import (
|
||||
)
|
||||
from .ukcolumn import UkColumnIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .uliza import (
|
||||
UlizaPlayerIE,
|
||||
UlizaPortalIE,
|
||||
)
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
@@ -2270,10 +2293,6 @@ from .utreon import UtreonIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veo import VeoIE
|
||||
from .veoh import (
|
||||
VeohIE,
|
||||
VeohUserIE,
|
||||
)
|
||||
from .vesti import VestiIE
|
||||
from .vevo import (
|
||||
VevoIE,
|
||||
@@ -2311,6 +2330,7 @@ from .videomore import (
|
||||
VideomoreVideoIE,
|
||||
)
|
||||
from .videopress import VideoPressIE
|
||||
from .vidflex import VidflexIE
|
||||
from .vidio import (
|
||||
VidioIE,
|
||||
VidioLiveIE,
|
||||
@@ -2345,10 +2365,6 @@ from .vimm import (
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .vine import (
|
||||
VineIE,
|
||||
VineUserIE,
|
||||
)
|
||||
from .viously import ViouslyIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
|
||||
@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'skip': 'This program is not currently available in ABC iview',
|
||||
}, {
|
||||
'url': 'https://iview.abc.net.au/show/inbestigators',
|
||||
'info_dict': {
|
||||
'id': '175343-1',
|
||||
'title': 'Series 1',
|
||||
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
|
||||
'series': 'The Inbestigators',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage_data = self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
video_data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
|
||||
transform_source=lambda x: x.encode().decode('unicode_escape'),
|
||||
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
|
||||
@@ -6,7 +6,6 @@ import hmac
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
@@ -18,10 +17,8 @@ from ..networking.exceptions import TransportError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
@@ -72,15 +69,15 @@ class AbemaLicenseRH(RequestHandler):
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
encvideokey = list(res.to_bytes(16, 'big'))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
enckey = list(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
return bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
@@ -424,14 +421,15 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
|
||||
|
||||
class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/#]+)/?(?:\?(?:[^#]+&)?s=(?P<season>[^&#]+))?'
|
||||
_PAGE_SIZE = 25
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/title/90-1597',
|
||||
'url': 'https://abema.tv/video/title/90-1887',
|
||||
'info_dict': {
|
||||
'id': '90-1597',
|
||||
'id': '90-1887',
|
||||
'title': 'シャッフルアイランド',
|
||||
'description': 'md5:61b2425308f41a5282a926edda66f178',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
@@ -439,41 +437,54 @@ class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
'info_dict': {
|
||||
'id': '193-132',
|
||||
'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
|
||||
'description': 'md5:9b59493d1f3a792bafbc7319258e7af8',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/25-102',
|
||||
'url': 'https://abema.tv/video/title/25-1nzan-whrxe',
|
||||
'info_dict': {
|
||||
'id': '25-102',
|
||||
'title': 'ソードアート・オンライン アリシゼーション',
|
||||
'id': '25-1nzan-whrxe',
|
||||
'title': 'ソードアート・オンライン',
|
||||
'description': 'md5:c094904052322e6978495532bdbf06e6',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
'playlist_mincount': 25,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/26-2mzbynr-cph?s=26-2mzbynr-cph_s40',
|
||||
'info_dict': {
|
||||
'title': '〈物語〉シリーズ',
|
||||
'id': '26-2mzbynr-cph',
|
||||
'description': 'md5:e67873de1c88f360af1f0a4b84847a52',
|
||||
},
|
||||
'playlist_count': 59,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, playlist_id, series_version, page):
|
||||
def _fetch_page(self, playlist_id, series_version, season_id, page):
|
||||
query = {
|
||||
'seriesVersion': series_version,
|
||||
'offset': str(page * self._PAGE_SIZE),
|
||||
'order': 'seq',
|
||||
'limit': str(self._PAGE_SIZE),
|
||||
}
|
||||
if season_id:
|
||||
query['seasonId'] = season_id
|
||||
programs = self._call_api(
|
||||
f'v1/video/series/{playlist_id}/programs', playlist_id,
|
||||
note=f'Downloading page {page + 1}',
|
||||
query={
|
||||
'seriesVersion': series_version,
|
||||
'offset': str(page * self._PAGE_SIZE),
|
||||
'order': 'seq',
|
||||
'limit': str(self._PAGE_SIZE),
|
||||
})
|
||||
query=query)
|
||||
yield from (
|
||||
self.url_result(f'https://abema.tv/video/episode/{x}')
|
||||
for x in traverse_obj(programs, ('programs', ..., 'id')))
|
||||
|
||||
def _entries(self, playlist_id, series_version):
|
||||
def _entries(self, playlist_id, series_version, season_id):
|
||||
return OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, playlist_id, series_version),
|
||||
functools.partial(self._fetch_page, playlist_id, series_version, season_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
playlist_id, season_id = self._match_valid_url(url).group('id', 'season')
|
||||
series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
|
||||
self._entries(playlist_id, series_info['version'], season_id), playlist_id=playlist_id,
|
||||
playlist_title=series_info.get('title'),
|
||||
playlist_description=series_info.get('content'))
|
||||
|
||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
||||
@@ -43,14 +43,14 @@ class ACastIE(ACastBaseIE):
|
||||
_VALID_URL = r'''(?x:
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
(?:(?:embed|www|shows)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
|
||||
(?P<channel>[^/?#]+)/(?:episodes/)?(?P<id>[^/#?"]+)
|
||||
)'''
|
||||
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'url': 'https://shows.acast.com/sparpodcast/episodes/2.raggarmordet-rosterurdetforflutna',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
@@ -59,7 +59,7 @@ class ACastIE(ACastBaseIE):
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766,
|
||||
'creator': 'Third Ear Studio',
|
||||
'creators': ['Third Ear Studio'],
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
|
||||
@@ -74,6 +74,9 @@ class ACastIE(ACastBaseIE):
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
@@ -110,7 +113,7 @@ class ACastChannelIE(ACastBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?acast\.com/|
|
||||
(?:(?:www|shows)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
@@ -120,12 +123,15 @@ class ACastChannelIE(ACastBaseIE):
|
||||
'info_dict': {
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
|
||||
'description': 'md5:feca253de9947634605080cd9eeea2bf',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://shows.acast.com/sparpodcast',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -11,11 +11,9 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
@@ -198,16 +196,16 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join(random.choices('0123456789abcdef', k=16))
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
message = list(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
}))
|
||||
}).encode())
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry with
|
||||
# a different random padding
|
||||
links_data = None
|
||||
for _ in range(3):
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
padded_message = bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
@@ -234,7 +232,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
|
||||
@@ -1355,13 +1355,14 @@ MSO_INFO = {
|
||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
headers.update(kwargs.get('headers') or {})
|
||||
kwargs['headers'] = headers
|
||||
return super()._download_webpage_handle(
|
||||
*args, **kwargs)
|
||||
@@ -1454,7 +1455,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
}, headers={
|
||||
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
'User-Agent': self._MODERN_USER_AGENT,
|
||||
} if mso_id == 'Comcast_SSO' else None)
|
||||
elif not self._cookies_passed:
|
||||
raise_mvpd_required()
|
||||
|
||||
|
||||
@@ -33,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'https://login.sooplive.co.kr/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
|
||||
-6: 'https://login.sooplive.co.kr/membership/changeMember.php',
|
||||
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
|
||||
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
|
||||
-12: 'https://member.sooplive.co.kr/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-7: 'You cannot use your Global Soop account to access Korean Soop.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
@@ -61,76 +61,48 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
return self._download_json(Request(
|
||||
f'https://api.m.afreecatv.com/{endpoint}',
|
||||
f'https://api.m.sooplive.co.kr/{endpoint}',
|
||||
data=data, headers=headers, query=query,
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
@staticmethod
|
||||
def _fixup_thumb(thumb_url):
|
||||
if not url_or_none(thumb_url):
|
||||
return None
|
||||
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
|
||||
return [{'url': thumb_url, 'ext': 'jpg'}]
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)/?(?:$|[?#&])
|
||||
'''
|
||||
IE_NAME = 'soop'
|
||||
IE_DESC = 'sooplive.co.kr'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'url': 'https://vod.sooplive.co.kr/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
@@ -142,12 +114,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'url': 'https://vod.sooplive.co.kr/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
@@ -157,36 +129,17 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/70395877',
|
||||
'url': 'https://vod.sooplive.co.kr/player/70395877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subscribers only
|
||||
'url': 'https://vod.afreecatv.com/player/104647403',
|
||||
'url': 'https://vod.sooplive.co.kr/player/104647403',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private
|
||||
'url': 'https://vod.afreecatv.com/player/81669846',
|
||||
'url': 'https://vod.sooplive.co.kr/player/81669846',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -209,8 +162,8 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('bj_id', {str}),
|
||||
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
})
|
||||
|
||||
entries = []
|
||||
@@ -233,7 +186,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
|
||||
'formats': formats,
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('file_start', {unified_timestamp}),
|
||||
}),
|
||||
})
|
||||
@@ -262,11 +215,11 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
|
||||
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:catchstory'
|
||||
IE_DESC = 'afreecatv.com catch story'
|
||||
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
|
||||
IE_NAME = 'soop:catchstory'
|
||||
IE_DESC = 'sooplive.co.kr catch story'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
|
||||
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
@@ -281,29 +234,28 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
@staticmethod
|
||||
def _entries(data):
|
||||
def _entries(self, data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:live'
|
||||
IE_DESC = 'afreecatv.com livestreams'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
IE_NAME = 'soop:live'
|
||||
IE_DESC = 'sooplive.co.kr livestreams'
|
||||
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'info_dict': {
|
||||
'id': '237852185',
|
||||
'ext': 'mp4',
|
||||
@@ -315,30 +267,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
|
||||
_WORKING_CDNS = [
|
||||
'gcp_cdn', # live-global-cdn-v02.afreecatv.com
|
||||
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
|
||||
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
|
||||
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
|
||||
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
|
||||
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
|
||||
]
|
||||
_BAD_CDNS = [
|
||||
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
|
||||
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
|
||||
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
|
||||
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
|
||||
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
|
||||
]
|
||||
|
||||
def _extract_formats(self, channel_info, broadcast_no, aid):
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
|
||||
|
||||
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
|
||||
default_cdn_ids = orderedSet([
|
||||
@@ -358,7 +310,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
try:
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
|
||||
headers={'Referer': 'https://play.afreecatv.com/'})
|
||||
headers={'Referer': 'https://play.sooplive.co.kr/'})
|
||||
except ExtractorError as e:
|
||||
if attempt == len(cdn_ids):
|
||||
raise
|
||||
@@ -374,7 +326,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
if not broadcast_no:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
result = channel_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
elif result == -6:
|
||||
self.raise_login_required(
|
||||
'This channel is streaming for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract broadcast number')
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if channel_info.get('BPWD') == 'Y' and password is None:
|
||||
@@ -403,7 +361,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
formats = self._extract_formats(channel_info, broadcast_no, aid)
|
||||
|
||||
station_info = traverse_obj(self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
|
||||
'Downloading channel metadata', 'Unable to download channel metadata',
|
||||
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
|
||||
|
||||
@@ -419,11 +377,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:user'
|
||||
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -431,7 +389,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
@@ -439,7 +397,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -447,7 +405,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -459,12 +417,12 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
|
||||
@@ -71,7 +71,7 @@ class AllstarBaseIE(InfoExtractor):
|
||||
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
|
||||
'duration': ('clipLength', {int_or_none}),
|
||||
'filesize': ('clipSizeBytes', {int_or_none}),
|
||||
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('createdDate', {int_or_none(scale=1000)}),
|
||||
'uploader': ('username', {str}),
|
||||
'uploader_id': ('user', '_id', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
|
||||
@@ -8,10 +8,8 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
@@ -33,24 +31,6 @@ class AnvatoIE(InfoExtractor):
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
@@ -241,31 +221,6 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id), # noqa: UP031
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
}
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
return int_or_none(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||
@@ -277,8 +232,8 @@ class AnvatoIE(InfoExtractor):
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
auth_secret = bytes(aes_encrypt(
|
||||
list(input_data[:64].encode()), list(self._AUTH_KEY)))
|
||||
query = {
|
||||
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
|
||||
'rtyp': 'fp',
|
||||
@@ -290,8 +245,6 @@ class AnvatoIE(InfoExtractor):
|
||||
}
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
|
||||
@@ -1,27 +1,42 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'duration': 5369,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
@@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
# The reviewbody is None for one of the reviews; just need to extract data without crashing
|
||||
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'info_dict': {
|
||||
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'ext': 'mp3',
|
||||
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'creators': ['Grateful Dead'],
|
||||
'duration': 338.31,
|
||||
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
|
||||
'display_id': 'gd95-04-02d1t04.shn',
|
||||
'location': 'Pyramid Arena',
|
||||
'uploader': 'jon@archive.org',
|
||||
'album': '1995-04-02 - Pyramid Arena',
|
||||
'upload_date': '20040519',
|
||||
'track_number': 4,
|
||||
'release_date': '19950402',
|
||||
'timestamp': 1084927901,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
info['comments'].append({
|
||||
'id': review.get('review_id'),
|
||||
'author': review.get('reviewer'),
|
||||
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
|
||||
'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
|
||||
'timestamp': unified_timestamp(review.get('createdate')),
|
||||
'parent': 'root'})
|
||||
|
||||
|
||||
@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
@@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2700,
|
||||
'duration': 2670,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
@@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'info_dict': {
|
||||
'id': '13847165',
|
||||
'chapters': 'count:8',
|
||||
'ext': 'mp4',
|
||||
'channel': 'WDR',
|
||||
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'series': 'Lokalzeit aus Düsseldorf',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
|
||||
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'upload_date': '20241031',
|
||||
'timestamp': 1730399400,
|
||||
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
|
||||
'duration': 1759,
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
@@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'age_limit': age_limit,
|
||||
**traverse_obj(media_data, {
|
||||
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
|
||||
'start_time': ('chapterTime', {int_or_none}),
|
||||
'title': ('chapterTitle', {str}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(media_data, ('meta', {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
@@ -470,7 +494,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
|
||||
@@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
|
||||
self._HEADERS['Authorization'] = f'Bearer {token}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
|
||||
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
|
||||
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
|
||||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -6,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
@@ -17,6 +19,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
@@ -40,6 +43,8 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader_url': 'https://youtube-dl.bandcamp.com',
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
'artists': ['youtube-dl "\'/\\ä↭'],
|
||||
'album_artists': ['youtube-dl "\'/\\ä↭'],
|
||||
},
|
||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
}, {
|
||||
@@ -266,6 +271,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311756226,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
|
||||
'album_artists': ['Blazo'],
|
||||
'uploader_url': 'https://blazo.bandcamp.com',
|
||||
'release_date': '20110727',
|
||||
'release_timestamp': 1311724800.0,
|
||||
'track': 'Intro',
|
||||
'uploader_id': 'blazo',
|
||||
'track_number': 1,
|
||||
'album': 'Jazz Format Mixtape vol.1',
|
||||
'artists': ['Blazo'],
|
||||
'duration': 19.335,
|
||||
'track_id': '1353101989',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -277,6 +294,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311757238,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
'track': 'Kero One - Keep It Alive (Blazo remix)',
|
||||
'release_date': '20110727',
|
||||
'track_id': '38097443',
|
||||
'track_number': 2,
|
||||
'duration': 181.467,
|
||||
'uploader_url': 'https://blazo.bandcamp.com',
|
||||
'album': 'Jazz Format Mixtape vol.1',
|
||||
'uploader_id': 'blazo',
|
||||
'album_artists': ['Blazo'],
|
||||
'artists': ['Blazo'],
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
|
||||
'release_timestamp': 1311724800.0,
|
||||
},
|
||||
},
|
||||
],
|
||||
@@ -284,6 +313,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'title': 'Jazz Format Mixtape vol.1',
|
||||
'id': 'jazz-format-mixtape-vol-1',
|
||||
'uploader_id': 'blazo',
|
||||
'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
@@ -358,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://bandcamp.com/?show=224',
|
||||
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||
'md5': '61acc9a002bed93986b91168aa3ab433',
|
||||
'info_dict': {
|
||||
'id': '224',
|
||||
'ext': 'opus',
|
||||
'ext': 'mp3',
|
||||
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||
'duration': 5829.77,
|
||||
@@ -371,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'episode_id': '224',
|
||||
},
|
||||
'params': {
|
||||
'format': 'opus-lo',
|
||||
'format': 'mp3-128',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
@@ -459,7 +489,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
'title': 'Discography of coldworldofficial',
|
||||
@@ -473,12 +503,19 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _yield_items(self, webpage):
|
||||
yield from (
|
||||
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
yield from traverse_obj(webpage, (
|
||||
{find_element(id='music-grid', html=True)}, {extract_attributes},
|
||||
'data-client-items', {json.loads}, ..., 'page_url', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader)
|
||||
|
||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
||||
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||
getter=urljoin(url))
|
||||
|
||||
437
yt_dlp/extractor/bandlab.py
Normal file
437
yt_dlp/extractor/bandlab.py
Normal file
@@ -0,0 +1,437 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
truncate_string,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class BandlabBaseIE(InfoExtractor):
|
||||
def _call_api(self, endpoint, asset_id, **kwargs):
|
||||
headers = kwargs.pop('headers', None) or {}
|
||||
return self._download_json(
|
||||
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
|
||||
asset_id, headers={
|
||||
'accept': 'application/json',
|
||||
'referer': 'https://www.bandlab.com/',
|
||||
'x-client-id': 'BandLab-Web',
|
||||
'x-client-version': '10.1.124',
|
||||
**headers,
|
||||
}, **kwargs)
|
||||
|
||||
def _parse_revision(self, revision_data, url=None):
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
'media_type': 'revision',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(revision_data, {
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
|
||||
'id': (('revisionId', 'id'), {str}, any),
|
||||
'title': ('song', 'name', {str}),
|
||||
'track': ('song', 'name', {str}),
|
||||
'url': ('mixdown', 'file', {url_or_none}),
|
||||
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
|
||||
'description': ('description', {str}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
'duration': ('mixdown', 'duration', {float_or_none}),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'genres': ('genres', ..., 'name', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _parse_track(self, track_data, url=None):
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
'media_type': 'track',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(track_data, {
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
|
||||
'id': (('revisionId', 'id'), {str}, any),
|
||||
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
|
||||
'title': ('track', 'name', {str}),
|
||||
'track': ('track', 'name', {str}),
|
||||
'description': ('caption', {str}),
|
||||
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'duration': ('track', 'sample', 'duration', {float_or_none}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _parse_video(self, video_data, url=None):
|
||||
return {
|
||||
'media_type': 'video',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(video_data, {
|
||||
'id': ('id', {str}),
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
|
||||
'url': ('video', 'url', {url_or_none}),
|
||||
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
'description': ('caption', {str}),
|
||||
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
|
||||
'view_count': ('video', 'counters', 'plays', {int_or_none}),
|
||||
'like_count': ('video', 'counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'duration': ('video', 'duration', {float_or_none}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class BandlabIE(BandlabBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
|
||||
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
|
||||
'info_dict': {
|
||||
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'ext': 'm4a',
|
||||
'uploader_id': 'ender_milze',
|
||||
'track': 'sweet black',
|
||||
'description': 'composed by juanjn3737',
|
||||
'timestamp': 1702171963,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 54.629999999999995,
|
||||
'title': 'sweet black',
|
||||
'upload_date': '20231210',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'genres': ['Lofi'],
|
||||
'uploader': 'ender milze',
|
||||
'comment_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Same track as above but post URL
|
||||
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
|
||||
'info_dict': {
|
||||
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'ext': 'm4a',
|
||||
'uploader_id': 'ender_milze',
|
||||
'track': 'sweet black',
|
||||
'description': 'composed by juanjn3737',
|
||||
'timestamp': 1702171973,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 54.629999999999995,
|
||||
'title': 'sweet black',
|
||||
'upload_date': '20231210',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'genres': ['Lofi'],
|
||||
'uploader': 'ender milze',
|
||||
'comment_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# SharedKey Example
|
||||
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
|
||||
'md5': '15174b57c44440e2a2008be9cae00250',
|
||||
'info_dict': {
|
||||
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
|
||||
'ext': 'm4a',
|
||||
'comment_count': int,
|
||||
'genres': ['Other'],
|
||||
'uploader_id': 'user8353034818103753',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
|
||||
'timestamp': 1709625771,
|
||||
'track': 'PodcastMaerchen4b',
|
||||
'duration': 468.14,
|
||||
'view_count': int,
|
||||
'description': 'Podcast: Neues aus der Märchenwelt',
|
||||
'like_count': int,
|
||||
'upload_date': '20240305',
|
||||
'uploader': 'Erna Wageneder',
|
||||
'title': 'PodcastMaerchen4b',
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Different Revision selected
|
||||
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
|
||||
'md5': '74e055ef9325d63f37088772fbfe4454',
|
||||
'info_dict': {
|
||||
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
|
||||
'ext': 'm4a',
|
||||
'timestamp': 1588273294,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
|
||||
'description': 'Final Revision.',
|
||||
'title': 'Replay ( Instrumental)',
|
||||
'uploader': 'David R Sparks',
|
||||
'uploader_id': 'davesnothome69',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'track': 'Replay ( Instrumental)',
|
||||
'genres': ['Rock'],
|
||||
'upload_date': '20200430',
|
||||
'like_count': int,
|
||||
'duration': 279.43,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Video
|
||||
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
|
||||
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
|
||||
'info_dict': {
|
||||
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
|
||||
'ext': 'mp4',
|
||||
'duration': 44.705,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
|
||||
'comment_count': int,
|
||||
'title': 'backing vocals',
|
||||
'uploader_id': 'marliashya',
|
||||
'uploader': 'auraa',
|
||||
'like_count': int,
|
||||
'description': 'backing vocals',
|
||||
'media_type': 'video',
|
||||
},
|
||||
}, {
|
||||
# Embed Example
|
||||
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
|
||||
'info_dict': {
|
||||
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'ext': 'm4a',
|
||||
'comment_count': int,
|
||||
'genres': ['Electronic'],
|
||||
'uploader': 'Charlie Henson',
|
||||
'timestamp': 1587328674,
|
||||
'upload_date': '20200419',
|
||||
'view_count': int,
|
||||
'track': 'Positronic Meltdown',
|
||||
'duration': 318.55,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
|
||||
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
|
||||
'uploader_id': 'microfreaks',
|
||||
'title': 'Positronic Meltdown',
|
||||
'like_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Track without revisions available
|
||||
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
|
||||
'md5': 'f05d68a3769952c2d9257c473e14c15f',
|
||||
'info_dict': {
|
||||
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
|
||||
'ext': 'm4a',
|
||||
'track': 'insame',
|
||||
'like_count': int,
|
||||
'duration': 84.03,
|
||||
'title': 'insame',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'Sorakime',
|
||||
'uploader_id': 'sorakime',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
|
||||
'timestamp': 1691162128,
|
||||
'upload_date': '20230804',
|
||||
'media_type': 'track',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://phantomluigi.github.io/',
|
||||
'info_dict': {
|
||||
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
|
||||
'ext': 'm4a',
|
||||
'view_count': int,
|
||||
'upload_date': '20240913',
|
||||
'uploader_id': 'phantommusicofficial',
|
||||
'timestamp': 1726194897,
|
||||
'uploader': 'Phantom',
|
||||
'comment_count': int,
|
||||
'genres': ['Progresive Rock'],
|
||||
'description': 'md5:a38cd668f7a2843295ef284114f18429',
|
||||
'duration': 225.23,
|
||||
'like_count': int,
|
||||
'title': 'Vermilion Pt. 2 (Cover)',
|
||||
'track': 'Vermilion Pt. 2 (Cover)',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
|
||||
|
||||
qs = parse_qs(url)
|
||||
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
|
||||
if url_type == 'revision':
|
||||
revision_id = display_id
|
||||
|
||||
revision_data = None
|
||||
if not revision_id:
|
||||
post_data = self._call_api(
|
||||
'posts', display_id, note='Downloading post data',
|
||||
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
|
||||
|
||||
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
|
||||
revision_data = traverse_obj(post_data, ('revision', {dict}))
|
||||
|
||||
if not revision_data and not revision_id:
|
||||
post_type = post_data.get('type')
|
||||
if post_type == 'Video':
|
||||
return self._parse_video(post_data, url=url)
|
||||
if post_type == 'Track':
|
||||
return self._parse_track(post_data, url=url)
|
||||
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
|
||||
|
||||
if not revision_data:
|
||||
revision_data = self._call_api(
|
||||
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
|
||||
|
||||
return self._parse_revision(revision_data, url=url)
|
||||
|
||||
|
||||
class BandlabPlaylistIE(BandlabBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'info_dict': {
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
|
||||
'release_date': '20221003',
|
||||
'title': 'Remnants',
|
||||
'album': 'Remnants',
|
||||
'like_count': int,
|
||||
'album_type': 'LP',
|
||||
'description': 'A collection of some feel good, rock hits.',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'uploader': 'David R Sparks',
|
||||
'uploader_id': 'davesnothome69',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
|
||||
'info_dict': {
|
||||
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
|
||||
'timestamp': 1720762659,
|
||||
'view_count': int,
|
||||
'title': 'My Shit 🖤',
|
||||
'uploader_id': 'slytheband',
|
||||
'uploader': '𝓢𝓛𝓨',
|
||||
'upload_date': '20240712',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}, {
|
||||
# Embeds can contain both albums and collections with the same URL pattern. This is an album
|
||||
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
|
||||
'info_dict': {
|
||||
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
|
||||
'release_date': '20230706',
|
||||
'description': 'This is a collection of songs I created when I had an Amiga computer.',
|
||||
'view_count': int,
|
||||
'title': 'Mark Salud The Amiga Collection',
|
||||
'uploader_id': 'mssirmooth1962',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
|
||||
'like_count': int,
|
||||
'uploader': 'Mark Salud',
|
||||
'album': 'Mark Salud The Amiga Collection',
|
||||
'album_type': 'LP',
|
||||
},
|
||||
'playlist_count': 24,
|
||||
}, {
|
||||
# Tracks without revision id
|
||||
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
|
||||
'info_dict': {
|
||||
'like_count': int,
|
||||
'uploader_id': 'sorakime',
|
||||
'comment_count': int,
|
||||
'uploader': 'Sorakime',
|
||||
'view_count': int,
|
||||
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
|
||||
'release_date': '20230812',
|
||||
'title': 'Art',
|
||||
'album': 'Art',
|
||||
'album_type': 'Album',
|
||||
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, album_data):
|
||||
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
|
||||
post_type = post['type']
|
||||
if post_type == 'Revision':
|
||||
yield self._parse_revision(post.get('revision'))
|
||||
elif post_type == 'Track':
|
||||
yield self._parse_track(post)
|
||||
elif post_type == 'Video':
|
||||
yield self._parse_video(post)
|
||||
else:
|
||||
self.report_warning(f'Skipping unknown post type: "{post_type}"')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
|
||||
|
||||
endpoints = {
|
||||
'albums': ['albums'],
|
||||
'collections': ['collections'],
|
||||
'embed': ['collections', 'albums'],
|
||||
}.get(playlist_type)
|
||||
for endpoint in endpoints:
|
||||
playlist_data = self._call_api(
|
||||
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
|
||||
fatal=False, expected_status=404)
|
||||
if not playlist_data.get('errorCode'):
|
||||
playlist_type = endpoint
|
||||
break
|
||||
if error_code := playlist_data.get('errorCode'):
|
||||
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_data), playlist_id,
|
||||
**traverse_obj(playlist_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
|
||||
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
}),
|
||||
**(traverse_obj(playlist_data, {
|
||||
'album': ('name', {str}),
|
||||
'album_type': ('type', {str}),
|
||||
}) if playlist_type == 'albums' else {}))
|
||||
@@ -1284,9 +1284,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1386,7 +1386,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'tbr': ('bitrate', {int_or_none(scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
@@ -1398,7 +1398,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
@@ -1428,7 +1428,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
'timestamp', {int_or_none(scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
68
yt_dlp/extractor/beacon.py
Normal file
68
yt_dlp/extractor/beacon.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BeaconTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://beacon.tv/content/welcome-to-beacon',
|
||||
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
|
||||
'info_dict': {
|
||||
'id': 'welcome-to-beacon',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240509',
|
||||
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
|
||||
'title': 'Your home for Critical Role!',
|
||||
'timestamp': 1715227200,
|
||||
'duration': 105.494,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
|
||||
'md5': 'd879b091485dbed2245094c8152afd89',
|
||||
'info_dict': {
|
||||
'id': 're-slayers-take-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Re-Slayer’s Take | Official Trailer',
|
||||
'timestamp': 1715189040,
|
||||
'upload_date': '20240508',
|
||||
'duration': 53.249,
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', '__APOLLO_STATE__',
|
||||
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
|
||||
if not content_data:
|
||||
raise ExtractorError('Failed to extract content data')
|
||||
|
||||
jwplayer_data = traverse_obj(content_data, (
|
||||
(('contentVideo', 'video', 'videoData'),
|
||||
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
|
||||
if not jwplayer_data:
|
||||
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
|
||||
raise ExtractorError('Content is not a video/podcast', expected=True)
|
||||
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
|
||||
self.raise_login_required('This video/podcast is for members only')
|
||||
raise ExtractorError('Failed to extract content')
|
||||
|
||||
return {
|
||||
**self._parse_jwplayer_data(jwplayer_data, video_id),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
@@ -1,18 +1,33 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes
|
||||
from ..utils import ExtractorError, extract_attributes
|
||||
|
||||
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
|
||||
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _brightcove_url_result(self, video_id, video_block):
|
||||
account_id = video_block.get('accountid') or '876450612001'
|
||||
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
||||
def _extract_video(self, video_block):
|
||||
video_element = self._search_regex(
|
||||
self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
|
||||
if video_element:
|
||||
video_element_attrs = extract_attributes(video_element)
|
||||
video_id = video_element_attrs.get('data-video-id')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_element_attrs.get('data-account') or '876450610001'
|
||||
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
|
||||
else:
|
||||
video_block_attrs = extract_attributes(video_block)
|
||||
video_id = video_block_attrs.get('videoid')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_block_attrs.get('accountid') or '876630703001'
|
||||
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
@@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video_block = extract_attributes(self._search_regex(
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
class BFMTVLiveIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:live'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/en-direct/',
|
||||
'info_dict': {
|
||||
'id': '5615950982001',
|
||||
'id': '6346069778112',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20220926',
|
||||
'timestamp': 1664207191,
|
||||
'upload_date': '20240202',
|
||||
'timestamp': 1706887572,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://.+/image\.jpg',
|
||||
'tags': [],
|
||||
@@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVArticleIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:article'
|
||||
@@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, webpage):
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video = self._extract_video(video_block_el)
|
||||
if video:
|
||||
yield video
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
|
||||
entries = []
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video_block = extract_attributes(video_block_el)
|
||||
video_id = video_block.get('videoid')
|
||||
if not video_id:
|
||||
continue
|
||||
entries.append(self._brightcove_url_result(video_id, video_block))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -50,7 +49,7 @@ class BibelTVBaseIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||
'season_number': 'seasonNumber',
|
||||
'episode_number': 'episodeNumber',
|
||||
|
||||
@@ -4,7 +4,9 @@ import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
@@ -18,7 +20,6 @@ from ..utils import (
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
@@ -46,6 +47,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||
_wbi_key_cache = {}
|
||||
@@ -62,7 +64,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||
if missing_formats:
|
||||
self.to_screen(
|
||||
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||
f'Format(s) {missing_formats} are missing; you have to '
|
||||
f'become a premium member to download them. {self._login_hint()}')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
@@ -108,7 +110,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('length', {float_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}))
|
||||
if fragments:
|
||||
@@ -123,7 +125,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'quality': ('quality', {int_or_none}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('timelength', {float_or_none(scale=1000)}),
|
||||
}),
|
||||
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||
})
|
||||
@@ -164,14 +166,18 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||
return params
|
||||
|
||||
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||
if qn:
|
||||
params['qn'] = qn
|
||||
def _download_playinfo(self, bvid, cid, headers=None, query=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})}
|
||||
if self.is_logged_in:
|
||||
params.pop('try_look', None)
|
||||
if qn := params.get('qn'):
|
||||
note = f'Downloading video format {qn} for cid {cid}'
|
||||
else:
|
||||
note = f'Downloading video formats for cid {cid}'
|
||||
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||
query=self._sign_wbi(params, bvid), headers=headers,
|
||||
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||
query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -190,9 +196,9 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}')
|
||||
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
self.report_warning(
|
||||
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||
@@ -206,8 +212,8 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False)
|
||||
'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False, headers=self._HEADERS)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
'start_time': 'from',
|
||||
@@ -285,7 +291,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
@@ -638,40 +644,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
headers['Referer'] = url
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
play_info_obj = self._search_json(
|
||||
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
|
||||
if not play_info_obj:
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
play_info = traverse_obj(play_info_obj, ('data', {dict}))
|
||||
if not play_info:
|
||||
if traverse_obj(play_info_obj, 'code') == 87007:
|
||||
toast = get_element_by_class('tips-toast', webpage) or ''
|
||||
msg = clean_html(
|
||||
f'{get_element_by_class("belongs-to", toast) or ""},'
|
||||
+ (get_element_by_class('level', toast) or ''))
|
||||
raise ExtractorError(
|
||||
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
|
||||
raise ExtractorError('Failed to extract play info')
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
page_list_json = not is_festival and traverse_obj(
|
||||
page_list_json = (not is_festival and traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
note='Extracting videos in anthology', headers=headers),
|
||||
'data', expected_type=list) or []
|
||||
'data', expected_type=list)) or []
|
||||
is_anthology = len(page_list_json) > 1
|
||||
|
||||
part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
|
||||
@@ -690,8 +685,6 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
|
||||
@@ -726,62 +719,79 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if not traverse_obj(play_info, ('dash')):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
play_info = None
|
||||
if self.is_logged_in:
|
||||
play_info = traverse_obj(
|
||||
self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None),
|
||||
('data', {dict}))
|
||||
if not play_info:
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
if video_data.get('is_upower_exclusive'):
|
||||
high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
|
||||
msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim=",")}. {self._login_hint()}'
|
||||
if not formats:
|
||||
raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True)
|
||||
if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})):
|
||||
self.report_warning(
|
||||
f'This is a supporter-only video, only the preview will be extracted: {msg}',
|
||||
video_id=video_id)
|
||||
|
||||
if not traverse_obj(play_info, 'dash'):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
@@ -859,10 +869,16 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
headers['Referer'] = url
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
|
||||
play_info = (
|
||||
self._search_json(
|
||||
r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id,
|
||||
end_pattern='\n', default=None)
|
||||
or self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
|
||||
headers=headers))
|
||||
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
@@ -1021,8 +1037,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BilibiliCheeseBaseIE(BilibiliBaseIE):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _extract_episode(self, season_info, ep_id):
|
||||
episode_info = traverse_obj(season_info, (
|
||||
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
|
||||
@@ -1165,28 +1179,26 @@ class BilibiliSpaceBaseIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>(?:/upload)?/video)?/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/3985676/video',
|
||||
'info_dict': {
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/313580179/video',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
'To download audios, add a "/upload/audio" to the URL')
|
||||
|
||||
def fetch_page(page_idx):
|
||||
query = {
|
||||
@@ -1199,6 +1211,12 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'ps': 30,
|
||||
'tid': 0,
|
||||
'web_location': 1550101,
|
||||
'dm_img_list': '[]',
|
||||
'dm_img_str': base64.b64encode(
|
||||
''.join(random.choices(string.printable, k=random.randint(16, 64))).encode())[:-2].decode(),
|
||||
'dm_cover_img_str': base64.b64encode(
|
||||
''.join(random.choices(string.printable, k=random.randint(32, 128))).encode())[:-2].decode(),
|
||||
'dm_img_inter': '{"ds":[],"wh":[6093,6631,31],"of":[430,760,380]}',
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -1209,14 +1227,14 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
'Request is blocked by server (412), please wait and try later.', expected=True)
|
||||
raise
|
||||
status_code = response['code']
|
||||
if status_code == -401:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
elif status_code == -352 and not self.is_logged_in:
|
||||
self.raise_login_required('Request is rejected, you need to login to access playlist')
|
||||
'Request is blocked by server (401), please wait and try later.', expected=True)
|
||||
elif status_code == -352:
|
||||
raise ExtractorError('Request is rejected by server (352)', expected=True)
|
||||
elif status_code != 0:
|
||||
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
|
||||
return response['data']
|
||||
@@ -1238,9 +1256,9 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
|
||||
|
||||
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/(?:upload/)?audio'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/313580179/audio',
|
||||
'url': 'https://space.bilibili.com/313580179/upload/audio',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
@@ -1263,7 +1281,8 @@ class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in page_data.get('data', []):
|
||||
# data is None when the playlist is empty
|
||||
for entry in page_data.get('data') or []:
|
||||
yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
@@ -1287,30 +1306,43 @@ class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
|
||||
|
||||
|
||||
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
|
||||
_VALID_URL = [
|
||||
r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)',
|
||||
r'https?://space\.bilibili\.com/(?P<mid>\d+)/lists/(?P<sid>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'url': 'https://space.bilibili.com/2142762/lists/3662502?type=season',
|
||||
'info_dict': {
|
||||
'id': '2142762_57445',
|
||||
'title': '【完结】《底特律 变人》全结局流程解说',
|
||||
'description': '',
|
||||
'id': '2142762_3662502',
|
||||
'title': '合集·《黑神话悟空》流程解说',
|
||||
'description': '黑神话悟空 相关节目',
|
||||
'uploader': '老戴在此',
|
||||
'uploader_id': '2142762',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/22302e17dc849dd4533606d71bc89df162c3a9bf.jpg',
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
'playlist_mincount': 62,
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/2142762/lists/3662502',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BilibiliSeriesListIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
'https://api.bilibili.com/x/polymer/web-space/seasons_archives_list',
|
||||
playlist_id, note=f'Downloading page {page_idx}', headers={'Referer': url},
|
||||
query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1337,9 +1369,12 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
|
||||
|
||||
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
|
||||
_VALID_URL = [
|
||||
r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)',
|
||||
r'https?://space\.bilibili\.com/(?P<mid>\d+)/lists/(?P<sid>\d+)/?\?(?:[^#]+&)?type=series(?:[&#]|$)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'url': 'https://space.bilibili.com/1958703906/lists/547718?type=series',
|
||||
'info_dict': {
|
||||
'id': '1958703906_547718',
|
||||
'title': '直播回放',
|
||||
@@ -1352,6 +1387,9 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -1370,7 +1408,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/series/archives',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
playlist_id, note=f'Downloading page {page_idx}', headers={'Referer': url},
|
||||
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1586,7 +1624,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
|
||||
'timestamp': ('ctime', {int_or_none}, filter),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
@@ -1849,10 +1887,51 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class BiliBiliDynamicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:t\.bilibili\.com|(?:www\.)?bilibili\.com/opus)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://t.bilibili.com/998134289197432852',
|
||||
'info_dict': {
|
||||
'id': 'BV1TAmBYVEJr',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '1192648858',
|
||||
'comment_count': int,
|
||||
'_old_archive_ids': ['bilibili 113457567568273_part1'],
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/50091efd965d9f13ff6814f7ad374f90ab21e77d.jpg',
|
||||
'duration': 929.238,
|
||||
'upload_date': '20241110',
|
||||
'uploader': '何同学工作室',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'title': '美国小朋友就玩这个?!何同学工作室11月开箱',
|
||||
'description': '本期产品信息:\n机器狗\n气味模拟器\nCloudboom Strike LS\n无弦吉他\n蓝牙磁带音箱\n神奇画板',
|
||||
'timestamp': 1731232800,
|
||||
'tags': list,
|
||||
'chapters': list,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
# Without the newer chrome UA, the API will return an error (-352)
|
||||
post_data = self._download_json(
|
||||
'https://api.bilibili.com/x/polymer/web-dynamic/v1/detail', post_id,
|
||||
query={'id': post_id}, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
})
|
||||
video_url = traverse_obj(post_data, (
|
||||
'data', 'item', (None, 'orig'), 'modules', 'module_dynamic',
|
||||
(('major', ('archive', 'pgc')), ('additional', ('reserve', 'common'))),
|
||||
'jump_url', {url_or_none}, any, {self._proto_relative_url}))
|
||||
if not video_url or (self.suitable(video_url) and post_id == self._match_id(video_url)):
|
||||
raise ExtractorError('No valid video URL found', expected=True)
|
||||
return self.url_result(video_url)
|
||||
|
||||
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
|
||||
393
yt_dlp/extractor/bluesky.py
Normal file
393
yt_dlp/extractor/bluesky.py
Normal file
@@ -0,0 +1,393 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
truncate_string,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BlueskyIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
|
||||
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
|
||||
'md5': '375539c1930ab05d15585ed772ab54fd',
|
||||
'info_dict': {
|
||||
'id': '3l4omssdl632g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Blu3Blu3Lilith',
|
||||
'uploader_id': 'blu3blue.bsky.social',
|
||||
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
|
||||
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'OMG WE HAVE VIDEOS NOW',
|
||||
'description': 'OMG WE HAVE VIDEOS NOW',
|
||||
'upload_date': '20240921',
|
||||
'timestamp': 1726940605,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
|
||||
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
|
||||
'info_dict': {
|
||||
'id': '3l4qhp7bcs52c',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'souris',
|
||||
'uploader_id': 'souris.moe',
|
||||
'uploader_url': 'https://bsky.app/profile/souris.moe',
|
||||
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l4qhp7bcs52c',
|
||||
'upload_date': '20240922',
|
||||
'timestamp': 1727003838,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||
'md5': 'cc0110ed1f6b0247caac8234cc1e861d',
|
||||
'info_dict': {
|
||||
'id': '3l3w4tnezek2e',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'clean',
|
||||
'uploader_id': 'de1.pds.tentacle.expert',
|
||||
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
|
||||
'channel_id': 'did:web:de1.tentacle.expert',
|
||||
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l3w4tnezek2e',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726098823,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
|
||||
'info_dict': {
|
||||
'id': 'XxK3t_5V3ao',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'yunayu',
|
||||
'uploader_id': '@yunayuispink',
|
||||
'uploader_url': 'https://www.youtube.com/@yunayuispink',
|
||||
'channel': 'yunayu',
|
||||
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
|
||||
'description': r're:Have a good goodx10000day',
|
||||
'title': '5min vs 5hours drawing',
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'upload_date': '20241026',
|
||||
'timestamp': 1729967784,
|
||||
'duration': 321,
|
||||
'age_limit': 0,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': [],
|
||||
'chapters': list,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
|
||||
'info_dict': {
|
||||
'id': '222792849',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'LASERBAT',
|
||||
'uploader_id': 'laserbatx',
|
||||
'uploader_url': 'https://laserbatx.bandcamp.com',
|
||||
'artists': ['LASERBAT'],
|
||||
'album_artists': ['LASERBAT'],
|
||||
'album': 'Hari Nezumi [EP]',
|
||||
'track': 'Forward to the End',
|
||||
'title': 'LASERBAT - Forward to the End',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
|
||||
'duration': 228.571,
|
||||
'track_id': '222792849',
|
||||
'release_date': '20230423',
|
||||
'upload_date': '20230423',
|
||||
'timestamp': 1682276040.0,
|
||||
'release_timestamp': 1682276040.0,
|
||||
'track_number': 1,
|
||||
},
|
||||
'add_ie': ['Bandcamp'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/cinny.bun.how/post/3l7rdfxhyds2f',
|
||||
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||
'info_dict': {
|
||||
'id': '3l7rdfxhyds2f',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'cinnamon',
|
||||
'uploader_id': 'cinny.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/cinny.bun.how',
|
||||
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'crazy that i look like this tbh',
|
||||
'description': 'crazy that i look like this tbh',
|
||||
'upload_date': '20241030',
|
||||
'timestamp': 1730332128,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['sexual'],
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
|
||||
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
|
||||
'info_dict': {
|
||||
'id': '3l6zrz6zyl2dr',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'mary🐇',
|
||||
'uploader_id': 'mary.my.id',
|
||||
'uploader_url': 'https://bsky.app/profile/mary.my.id',
|
||||
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l6zrz6zyl2dr',
|
||||
'upload_date': '20241021',
|
||||
'timestamp': 1729523172,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241026',
|
||||
'description': 'One of my favorite videos',
|
||||
'comment_count': int,
|
||||
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
|
||||
'uploader': 'Purple.Ice.Tea',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'like_count': int,
|
||||
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'repost_count': int,
|
||||
'timestamp': 1729973202,
|
||||
'tags': [],
|
||||
'uploader_id': 'purpleicetea.bsky.social',
|
||||
'title': 'One of my favorite videos',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3l77u64l7le2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'hearing people on twitter say that bluesky isn\'...',
|
||||
'like_count': int,
|
||||
'uploader_id': 'thafnine.net',
|
||||
'uploader_url': 'https://bsky.app/profile/thafnine.net',
|
||||
'upload_date': '20241024',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
|
||||
'tags': [],
|
||||
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
|
||||
'uploader': 'T9',
|
||||
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'timestamp': 1729731642,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}],
|
||||
}]
|
||||
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
|
||||
|
||||
def _get_service_endpoint(self, did, video_id):
|
||||
if did.startswith('did:web:'):
|
||||
url = f'https://{did[8:]}/.well-known/did.json'
|
||||
else:
|
||||
url = f'https://plc.directory/{did}'
|
||||
services = self._download_json(
|
||||
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
|
||||
return traverse_obj(
|
||||
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
|
||||
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
|
||||
|
||||
def _extract_post(self, handle, post_id):
|
||||
return self._download_json(
|
||||
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
|
||||
post_id, query={
|
||||
'uri': f'at://{handle}/app.bsky.feed.post/{post_id}',
|
||||
'depth': 0,
|
||||
'parentHeight': 0,
|
||||
})['thread']['post']
|
||||
|
||||
def _real_extract(self, url):
|
||||
handle, video_id = self._match_valid_url(url).group('handle', 'id')
|
||||
post = self._extract_post(handle, video_id)
|
||||
|
||||
entries = []
|
||||
# app.bsky.embed.video.view/app.bsky.embed.external.view
|
||||
entries.extend(self._extract_videos(post, video_id))
|
||||
# app.bsky.embed.recordWithMedia.view
|
||||
entries.extend(self._extract_videos(
|
||||
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
|
||||
# app.bsky.embed.record.view
|
||||
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
|
||||
entries.extend(self._extract_videos(
|
||||
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
|
||||
|
||||
if not entries:
|
||||
raise ExtractorError('No video could be found in this post', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
@staticmethod
|
||||
def _build_profile_url(path):
|
||||
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
|
||||
|
||||
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
|
||||
embed_path = variadic(embed_path, (str, bytes, dict, set))
|
||||
record_path = variadic(record_path, (str, bytes, dict, set))
|
||||
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
|
||||
|
||||
entries = []
|
||||
if external_uri := traverse_obj(root, (
|
||||
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
|
||||
entries.append(self.url_result(external_uri))
|
||||
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
return entries
|
||||
|
||||
video_cid = traverse_obj(
|
||||
root, (*embed_path, 'cid', {str}),
|
||||
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
|
||||
did = traverse_obj(root, ('author', 'did', {str}))
|
||||
|
||||
if did and video_cid:
|
||||
endpoint = self._get_service_endpoint(did, video_id)
|
||||
|
||||
formats.append({
|
||||
'format_id': 'blob',
|
||||
'quality': 1,
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'ext': ('mimeType', {mimetype2ext}),
|
||||
})),
|
||||
})
|
||||
|
||||
for sub_data in traverse_obj(root, (
|
||||
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
|
||||
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(root, {
|
||||
'id': ('uri', {url_basename}),
|
||||
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
|
||||
'alt_title': (*embed_path, 'alt', {str}, filter),
|
||||
'uploader': ('author', 'displayName', {str}),
|
||||
'uploader_id': ('author', 'handle', {str}),
|
||||
'uploader_url': ('author', 'handle', {self._build_profile_url}),
|
||||
'channel_id': ('author', 'did', {str}),
|
||||
'channel_url': ('author', 'did', {self._build_profile_url}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'repost_count': ('repostCount', {int_or_none}),
|
||||
'comment_count': ('replyCount', {int_or_none}),
|
||||
'timestamp': ('indexedAt', {parse_iso8601}),
|
||||
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
|
||||
'age_limit': (
|
||||
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
|
||||
'description': (*record_path, 'text', {str}, filter),
|
||||
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
}),
|
||||
})
|
||||
return entries
|
||||
@@ -1,35 +1,20 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
@@ -41,12 +26,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'creators': ['Kooperative Berlin'],
|
||||
'description': r're:Joachim Gauck, .*\n\nKamera: .*',
|
||||
'release_date': '20150716',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https?://www\.bpb\.de/cache/images/7/297_teaser_16x9_1240\.jpg.*',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -55,11 +40,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/4/522184_teaser_16x9_1240\.png.*',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -68,11 +54,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/9/518789_teaser_16x9_1240\.jpeg.*',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -84,12 +71,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'creators': ['Axel Schröder'],
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/3/315813_teaser_16x9_1240\.png.*',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -98,12 +85,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'creators': ['Bundeszentrale für politische Bildung'],
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/6/517806_teaser_16x9_1240\.png.*',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -147,7 +134,7 @@ class BpbIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
title_result = traverse_obj(webpage, ({find_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
@@ -156,15 +143,15 @@ class BpbIE(InfoExtractor):
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
{find_element(cls='opening-intro')},
|
||||
[{find_element(tag='bpb-accordion-item')}, {find_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'creators': traverse_obj(self._html_search_meta('author', webpage), all),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('poster', {urljoin(url)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
@@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
|
||||
@@ -31,6 +31,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
@@ -935,8 +936,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
|
||||
if content_type == 'playlist':
|
||||
return self.playlist_result(
|
||||
[self._parse_brightcove_metadata(vid, vid.get('id'), headers)
|
||||
for vid in json_data.get('videos', []) if vid.get('id')],
|
||||
(self._parse_brightcove_metadata(vid, vid['id'], headers)
|
||||
for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))),
|
||||
json_data.get('id'), json_data.get('name'),
|
||||
json_data.get('description'))
|
||||
|
||||
|
||||
@@ -8,11 +8,13 @@ from ..utils import (
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
format_field,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BundestagIE(InfoExtractor):
|
||||
@@ -115,9 +117,8 @@ class BundestagIE(InfoExtractor):
|
||||
note='Downloading metadata overlay', fatal=False,
|
||||
), {
|
||||
'title': (
|
||||
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||
{find_element(tag='h3')}, {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({find_element(tag='p')}, {clean_html}),
|
||||
}))
|
||||
|
||||
return result
|
||||
|
||||
@@ -53,7 +53,7 @@ class CaffeineTVIE(InfoExtractor):
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}, {lambda x: x or None}),
|
||||
'tags': ('tags', ..., {str}, filter),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': (((None, 'user'), 'username'), {str}, any),
|
||||
'is_live': ('is_live', {bool}),
|
||||
@@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
|
||||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('preview_image_path', {urljoin(url)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
|
||||
@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
|
||||
@@ -4,7 +4,6 @@ import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
@@ -12,7 +11,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
@@ -455,8 +453,8 @@ class CBCPlayerIE(InfoExtractor):
|
||||
|
||||
chapters = traverse_obj(data, (
|
||||
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
|
||||
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
'title': ('name', {str}),
|
||||
}))
|
||||
# Filter out pointless single chapters with start_time==0 and no end_time
|
||||
@@ -467,8 +465,8 @@ class CBCPlayerIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
|
||||
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
|
||||
'media_type': ('media', 'clipType', {str}),
|
||||
'series': ('showName', {str}),
|
||||
'season_number': ('media', 'season', {int_or_none}),
|
||||
@@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# This is a normal, public, TV show video
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s06e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoke Signals',
|
||||
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'duration': 1314,
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
||||
'duration': 1324,
|
||||
'categories': ['comedy'],
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season': 'Season 6',
|
||||
@@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode': 'Smoke Signals',
|
||||
'episode_number': 1,
|
||||
'episode_id': 'schitts-creek/s06e01',
|
||||
'upload_date': '20210618',
|
||||
'timestamp': 1623988800,
|
||||
'release_date': '20200107',
|
||||
'release_timestamp': 1578427200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# This video requires an account in the browser, but works fine in yt-dlp
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
|
||||
'md5': '297a9600f554f2258aed01514226a697',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s01e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Cup Runneth Over',
|
||||
'description': 'md5:9bca14ea49ab808097530eb05a29e797',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
@@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode_id': 'schitts-creek/s01e01',
|
||||
'duration': 1309,
|
||||
'categories': ['comedy'],
|
||||
'upload_date': '20210617',
|
||||
'timestamp': 1623902400,
|
||||
'release_date': '20151124',
|
||||
'release_timestamp': 1448323200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
|
||||
'only_matching': True,
|
||||
@@ -631,38 +633,6 @@ class CBCGemIE(InfoExtractor):
|
||||
return
|
||||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||
|
||||
def _find_secret_formats(self, formats, video_id):
|
||||
""" Find a valid video url and convert it to the secret variant """
|
||||
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
|
||||
if not base_format:
|
||||
return
|
||||
|
||||
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
|
||||
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
|
||||
|
||||
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
|
||||
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
|
||||
return
|
||||
|
||||
for child in secret_xml:
|
||||
if child.attrib.get('Type') != 'video':
|
||||
continue
|
||||
for video_quality in child:
|
||||
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
|
||||
if not bitrate or 'Index' not in video_quality.attrib:
|
||||
continue
|
||||
height = int_or_none(video_quality.attrib.get('MaxHeight'))
|
||||
|
||||
yield {
|
||||
**base_format,
|
||||
'format_id': join_nonempty('sec', height),
|
||||
# Note: \g<1> is necessary instead of \1 since bitrate is a number
|
||||
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
|
||||
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
|
||||
'tbr': bitrate / 1000.0,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json(
|
||||
@@ -676,7 +646,6 @@ class CBCGemIE(InfoExtractor):
|
||||
else:
|
||||
headers = {}
|
||||
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
||||
m3u8_url = m3u8_info.get('url')
|
||||
|
||||
if m3u8_info.get('errorCode') == 1:
|
||||
self.raise_geo_restricted(countries=['CA'])
|
||||
@@ -685,9 +654,9 @@ class CBCGemIE(InfoExtractor):
|
||||
elif m3u8_info.get('errorCode') != 0:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
||||
self._remove_duplicate_formats(formats)
|
||||
formats.extend(self._find_secret_formats(formats, video_id))
|
||||
|
||||
for fmt in formats:
|
||||
if fmt.get('vcodec') == 'none':
|
||||
@@ -703,20 +672,21 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'series': video_info.get('series'),
|
||||
'season_number': video_info.get('season'),
|
||||
'season': f'Season {video_info.get("season")}',
|
||||
'episode_number': video_info.get('episode'),
|
||||
'episode': video_info.get('title'),
|
||||
'episode_id': video_id,
|
||||
'duration': video_info.get('duration'),
|
||||
'categories': [video_info.get('category')],
|
||||
'formats': formats,
|
||||
'release_timestamp': video_info.get('airDate'),
|
||||
'timestamp': video_info.get('availableDate'),
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'series': ('series', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'categories': ('category', {str}, all),
|
||||
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ class CBSNewsBaseIE(InfoExtractor):
|
||||
**traverse_obj(item, {
|
||||
'title': (None, ('fulltitle', 'title')),
|
||||
'description': 'dek',
|
||||
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
|
||||
'timestamp': ('timestamp', {float_or_none(scale=1000)}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'subtitles': ('captions', {get_subtitles}),
|
||||
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
|
||||
|
||||
@@ -12,53 +12,86 @@ from ..utils import (
|
||||
|
||||
|
||||
class CCMAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
|
||||
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||
'info_dict': {
|
||||
'id': '5630208',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
'alt_title': 'EsportMarató2016WEB_PerPublicar',
|
||||
'duration': 79,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
|
||||
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'categories': ['Divulgació'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
'md5': 'fa3e38f269329a278271276330261425',
|
||||
'info_dict': {
|
||||
'id': '943685',
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'upload_date': '20161217',
|
||||
'timestamp': 1482011700,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
'series': 'Tot gira',
|
||||
'duration': 821,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
|
||||
'md5': '27493513d08a3e5605814aee9bb778d2',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'timestamp': 1582577919,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'subtitles': 'mincount:1',
|
||||
'age_limit': 13,
|
||||
'series': 'Crims',
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
|
||||
'duration': 3203,
|
||||
'categories': ['Divulgació'],
|
||||
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
|
||||
'info_dict': {
|
||||
'id': '5759227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Una mosca volava per la llum',
|
||||
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
|
||||
'description': 'md5:9ab64276944b0825336f4147f13f7854',
|
||||
'series': 'Mic',
|
||||
'upload_date': '20180411',
|
||||
'timestamp': 1523440105,
|
||||
'duration': 160,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
|
||||
'categories': ['Música'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_type, media_id = self._match_valid_url(url).groups()
|
||||
media_type, media_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
media = self._download_json(
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
|
||||
@@ -12,6 +12,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_ord
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
@@ -351,3 +352,50 @@ class CDAIE(InfoExtractor):
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
'info_dict': {
|
||||
'id': '31188385',
|
||||
'title': 'SERIA DRUGA',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
|
||||
'info_dict': {
|
||||
'id': '2664592',
|
||||
'title': 'VideoDowcipy - wszystkie odcinki',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
|
||||
'info_dict': {
|
||||
'id': '19129979',
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
|
||||
|
||||
webpage = self._download_webpage(url, folder_id)
|
||||
|
||||
def extract_page_entries(page):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
|
||||
f'Downloading page {page + 1}', expected_status=404)
|
||||
items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
|
||||
for video_id in items:
|
||||
yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
|
||||
folder_id, self._og_search_title(webpage))
|
||||
|
||||
@@ -5,11 +5,12 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
lowercase_escape,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class ChaturbateIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.chaturbate.com/siswet19/',
|
||||
'info_dict': {
|
||||
@@ -29,16 +30,58 @@ class ChaturbateIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://en.chaturbate.com/siswet19/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.eu/siswet19/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.global/siswet19/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_ROOM_OFFLINE = 'Room is currently offline'
|
||||
_ERROR_MAP = {
|
||||
'offline': 'Room is currently offline',
|
||||
'private': 'Room is currently in a private show',
|
||||
'away': 'Performer is currently away',
|
||||
'password protected': 'Room is password protected',
|
||||
'hidden': 'Hidden session in progress',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
def _extract_from_api(self, video_id, tld):
|
||||
response = self._download_json(
|
||||
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
|
||||
data=urlencode_postdata({'room_slug': video_id}),
|
||||
headers={
|
||||
**self.geo_verification_headers(),
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Accept': 'application/json',
|
||||
}, fatal=False, impersonate=True) or {}
|
||||
|
||||
m3u8_url = response.get('url')
|
||||
if not m3u8_url:
|
||||
status = response.get('room_status')
|
||||
if error := self._ERROR_MAP.get(status):
|
||||
raise ExtractorError(error, expected=True)
|
||||
if status == 'public':
|
||||
self.raise_geo_restricted()
|
||||
self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction')
|
||||
return None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
|
||||
'is_live': True,
|
||||
'age_limit': 18,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
|
||||
}
|
||||
|
||||
def _extract_from_html(self, video_id, tld):
|
||||
webpage = self._download_webpage(
|
||||
f'https://chaturbate.com/{video_id}/', video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
f'https://chaturbate.{tld}/{video_id}/', video_id,
|
||||
headers=self.geo_verification_headers(), impersonate=True)
|
||||
|
||||
found_m3u8_urls = []
|
||||
|
||||
@@ -76,8 +119,8 @@ class ChaturbateIE(InfoExtractor):
|
||||
webpage, 'error', group='error', default=None)
|
||||
if not error:
|
||||
if any(p in webpage for p in (
|
||||
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
|
||||
error = self._ROOM_OFFLINE
|
||||
self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
|
||||
error = self._ERROR_MAP['offline']
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('Unable to find stream URL')
|
||||
@@ -104,3 +147,7 @@ class ChaturbateIE(InfoExtractor):
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, tld = self._match_valid_url(url).group('id', 'tld')
|
||||
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
@@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
@@ -146,23 +144,37 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
|
||||
video_status = video_meta.get('vodStatus')
|
||||
if video_status == 'UPLOAD':
|
||||
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
|
||||
elif video_status == 'ABR_HLS':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
|
||||
video_id, query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
})
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
|
||||
formats, subtitles = [], {}
|
||||
live_status = 'post_live' if live_status == 'was_live' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'timestamp': ('publishDateAt', {float_or_none(scale=1000)}),
|
||||
'view_count': ('readCount', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
@@ -129,7 +130,7 @@ class CineverseIE(CineverseBaseIE):
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
|
||||
@@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
|
||||
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
@@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||
'ext': 'mp4',
|
||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
@@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': '0e8e040aec776862e1d632a699edf59e',
|
||||
'ext': 'mp4',
|
||||
'title': '0e8e040aec776862e1d632a699edf59e',
|
||||
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
@@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'ext': 'mp4',
|
||||
'title': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
@@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
|
||||
video_id, domain = self._match_valid_url(url).group('id', 'domain')
|
||||
if domain != 'bytehighway.net':
|
||||
domain = 'cloudflarestream.com'
|
||||
base_url = f'https://{domain}/{video_id}/'
|
||||
if '.' in video_id:
|
||||
video_id = self._parse_json(base64.urlsafe_b64decode(
|
||||
|
||||
@@ -1,146 +1,225 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import merge_dicts, try_call, url_basename
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_call,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_elements, traverse_obj
|
||||
|
||||
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'info_dict': {
|
||||
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
|
||||
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
'upload_date': '20240531',
|
||||
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
|
||||
'duration': 373.0,
|
||||
'timestamp': 1717148586,
|
||||
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
|
||||
'modified_date': '20240531',
|
||||
'modified_timestamp': 1717150140,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'info_dict': {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
|
||||
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'ext': 'mp4',
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
|
||||
'title': 'Here’s how some inmates in closely divided state are now able to vote from jail',
|
||||
'timestamp': 1718158269,
|
||||
'upload_date': '20240612',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
|
||||
'duration': 202.0,
|
||||
'modified_date': '20240612',
|
||||
'modified_timestamp': 1718158509,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
|
||||
'info_dict': {
|
||||
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
|
||||
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
|
||||
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
|
||||
'title': 'Video shows King Charles\' portrait being vandalized by activists',
|
||||
'timestamp': 1718113852,
|
||||
'upload_date': '20240611',
|
||||
'duration': 51.0,
|
||||
'modified_timestamp': 1718116193,
|
||||
'modified_date': '20240611',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
|
||||
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
|
||||
'duration': 158.0,
|
||||
'title': 'Robin Meade signs off after HLN\'s last broadcast',
|
||||
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670284296,
|
||||
'modified_timestamp': 1670332404,
|
||||
'modified_date': '20221206',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'info_dict': {
|
||||
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'timestamp': 1729501452,
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
|
||||
'description': 'md5:256ee7137d161f776cda429654135e52',
|
||||
'upload_date': '20241021',
|
||||
'duration': 31.0,
|
||||
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
|
||||
'modified_date': '20241021',
|
||||
'modified_timestamp': 1729501530,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
|
||||
'info_dict': {
|
||||
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
|
||||
'info_dict': {
|
||||
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
|
||||
'ext': 'mp4',
|
||||
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
|
||||
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
|
||||
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
|
||||
'duration': 173.0,
|
||||
'timestamp': 1729122182,
|
||||
'upload_date': '20241016',
|
||||
'modified_timestamp': 1729194706,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'md5': '11604ab4af83b650826753f1ccb8ecff',
|
||||
'info_dict': {
|
||||
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
|
||||
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
|
||||
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
|
||||
'duration': 145.0,
|
||||
'timestamp': 1729137765,
|
||||
'upload_date': '20241017',
|
||||
'modified_timestamp': 1729138184,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}],
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = self._match_valid_url(url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
display_id = self._match_valid_url(url).group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_id = traverse_obj(
|
||||
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
|
||||
('TOP_AUTH_SERVICE_APP_ID', {str}))
|
||||
|
||||
entries = []
|
||||
for player_data in traverse_obj(webpage, (
|
||||
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
|
||||
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
|
||||
media_id = player_data['data-media-id']
|
||||
parent_uri = player_data.get('data-video-resource-parent-uri')
|
||||
formats, subtitles = [], {}
|
||||
|
||||
video_data = {}
|
||||
if parent_uri:
|
||||
video_data = self._download_json(
|
||||
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
|
||||
query={
|
||||
'id': media_id,
|
||||
'stellarUri': parent_uri,
|
||||
})
|
||||
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
|
||||
resolution, bitrate = None, None
|
||||
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
|
||||
resolution, bitrate = mobj.group('res', 'tbr')
|
||||
formats.append({
|
||||
'url': direct_url,
|
||||
'format_id': 'direct',
|
||||
'quality': 1,
|
||||
'tbr': int_or_none(bitrate),
|
||||
**parse_resolution(resolution),
|
||||
})
|
||||
for sub_data in traverse_obj(video_data, (
|
||||
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
|
||||
'url': sub_data['url'],
|
||||
'name': sub_data.get('label'),
|
||||
})
|
||||
|
||||
if app_id:
|
||||
media_data = self._download_json(
|
||||
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
|
||||
query={'appId': app_id})
|
||||
m3u8_url = traverse_obj(media_data, (
|
||||
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
|
||||
if m3u8_url:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
entries.append({
|
||||
**traverse_obj(player_data, {
|
||||
'title': ('data-headline', {clean_html}),
|
||||
'description': ('data-description', {clean_html}),
|
||||
'duration': ('data-duration', {parse_duration}),
|
||||
'timestamp': ('data-publish-date', {parse_iso8601}),
|
||||
'thumbnail': (
|
||||
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
|
||||
{update_url(query='c=original')}),
|
||||
'display_id': 'data-video-slug',
|
||||
}),
|
||||
**traverse_obj(video_data, {
|
||||
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
|
||||
'description': ('description', {clean_html}),
|
||||
'title': ('headline', {str}),
|
||||
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
|
||||
'duration': ('trt', {int_or_none}),
|
||||
}),
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||
_TEST = {
|
||||
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminalizing journalism?',
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
|
||||
class CNNIndonesiaIE(InfoExtractor):
|
||||
|
||||
@@ -25,7 +25,6 @@ import xml.etree.ElementTree
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_os_name,
|
||||
urllib_req_to_req,
|
||||
)
|
||||
from ..cookies import LenientSimpleCookie
|
||||
@@ -35,6 +34,7 @@ from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
TransportError,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
@@ -46,6 +46,7 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
ISO639Utils,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
@@ -277,6 +278,7 @@ class InfoExtractor:
|
||||
thumbnails: A list of dictionaries, with the following entries:
|
||||
* "id" (optional, string) - Thumbnail format ID
|
||||
* "url"
|
||||
* "ext" (optional, string) - actual image extension if not given in URL
|
||||
* "preference" (optional, int) - quality of the image
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
@@ -332,7 +334,7 @@ class InfoExtractor:
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||
average_rating: Average rating given by users, the scale used depends on the webpage
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
properties (all but one of text or html optional):
|
||||
@@ -519,7 +521,7 @@ class InfoExtractor:
|
||||
or _extract_from_webpage as necessary. While these are normally classmethods,
|
||||
_extract_from_webpage is allowed to be an instance method.
|
||||
|
||||
_extract_from_webpage may raise self.StopExtraction() to stop further
|
||||
_extract_from_webpage may raise self.StopExtraction to stop further
|
||||
processing of the webpage and obtain exclusive rights to it. This is useful
|
||||
when the extractor cannot reliably be matched using just the URL,
|
||||
e.g. invidious/peertube instances
|
||||
@@ -572,13 +574,13 @@ class InfoExtractor:
|
||||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}',
|
||||
'password': f'Use {password_hint}',
|
||||
'cookies': (
|
||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
|
||||
'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}',
|
||||
'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}',
|
||||
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
@@ -965,6 +967,9 @@ class InfoExtractor:
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
if content is False:
|
||||
assert not fatal
|
||||
return False
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
@@ -1023,7 +1028,7 @@ class InfoExtractor:
|
||||
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
absfilepath = os.path.abspath(filename)
|
||||
if len(absfilepath) > 259:
|
||||
filename = fR'\\?\{absfilepath}'
|
||||
@@ -1039,7 +1044,15 @@ class InfoExtractor:
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
try:
|
||||
webpage_bytes = urlh.read()
|
||||
except TransportError as err:
|
||||
errmsg = f'{video_id}: Error reading response: {err.msg}'
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, cause=err)
|
||||
self.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
@@ -1396,6 +1409,13 @@ class InfoExtractor:
|
||||
return None, None
|
||||
|
||||
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
||||
|
||||
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
|
||||
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
|
||||
if sys.version_info < (3, 11):
|
||||
return tuple(x if x != '""' else '' for x in info[::2])
|
||||
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
@@ -1558,7 +1578,9 @@ class InfoExtractor:
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
for mobj in re.finditer(JSON_LD_RE, html):
|
||||
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
|
||||
json_ld_item = self._parse_json(
|
||||
mobj.group('json_ld'), video_id, fatal=fatal,
|
||||
errnote=False if default is not NO_DEFAULT else None)
|
||||
for json_ld in variadic(json_ld_item):
|
||||
if isinstance(json_ld, dict):
|
||||
yield json_ld
|
||||
@@ -1698,7 +1720,7 @@ class InfoExtractor:
|
||||
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
|
||||
if rating is not None:
|
||||
info['average_rating'] = rating
|
||||
if is_type(e, 'TVEpisode', 'Episode'):
|
||||
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': episode_name,
|
||||
@@ -1832,12 +1854,26 @@ class InfoExtractor:
|
||||
|
||||
@staticmethod
|
||||
def _remove_duplicate_formats(formats):
|
||||
format_urls = set()
|
||||
seen_urls = set()
|
||||
seen_fragment_urls = set()
|
||||
unique_formats = []
|
||||
for f in formats:
|
||||
if f['url'] not in format_urls:
|
||||
format_urls.add(f['url'])
|
||||
fragments = f.get('fragments')
|
||||
if callable(fragments):
|
||||
unique_formats.append(f)
|
||||
|
||||
elif fragments:
|
||||
fragment_urls = frozenset(
|
||||
fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path'])
|
||||
for fragment in fragments)
|
||||
if fragment_urls not in seen_fragment_urls:
|
||||
seen_fragment_urls.add(fragment_urls)
|
||||
unique_formats.append(f)
|
||||
|
||||
elif f['url'] not in seen_urls:
|
||||
seen_urls.add(f['url'])
|
||||
unique_formats.append(f)
|
||||
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||
@@ -2065,7 +2101,7 @@ class InfoExtractor:
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
@@ -2800,11 +2836,11 @@ class InfoExtractor:
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if try_call(lambda: base_url_e.text) is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
if re.match(r'https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and base_url.startswith('/'):
|
||||
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
||||
elif mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
elif mpd_base_url and not re.match(r'https?://', base_url):
|
||||
if not mpd_base_url.endswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
@@ -2894,7 +2930,7 @@ class InfoExtractor:
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
return 'url' if re.match(r'https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
@@ -3059,7 +3095,11 @@ class InfoExtractor:
|
||||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
stream_language = stream.get('Language', 'und')
|
||||
# IsmFD expects ISO 639 Set 2 language codes (3-character length)
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
|
||||
stream_language = stream.get('Language') or 'und'
|
||||
if len(stream_language) != 3:
|
||||
stream_language = ISO639Utils.short2long(stream_language) or 'und'
|
||||
for track in stream.findall('QualityLevel'):
|
||||
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
|
||||
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
|
||||
@@ -3489,7 +3529,7 @@ class InfoExtractor:
|
||||
continue
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@@ -3741,7 +3781,7 @@ class InfoExtractor:
|
||||
""" Merge subtitle dictionaries, language by language. """
|
||||
if target is None:
|
||||
target = {}
|
||||
for d in dicts:
|
||||
for d in filter(None, dicts):
|
||||
for lang, subs in d.items():
|
||||
target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
|
||||
return target
|
||||
@@ -3763,7 +3803,7 @@ class InfoExtractor:
|
||||
def mark_watched(self, *args, **kwargs):
|
||||
if not self.get_param('mark_watched', False):
|
||||
return
|
||||
if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed:
|
||||
if (self.supports_login() and self._get_login_info()[0] is not None) or self._cookies_passed:
|
||||
self._mark_watched(*args, **kwargs)
|
||||
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
|
||||
m_paths = re.finditer(
|
||||
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||
paths = orderedSet(m.group(1) for m in m_paths)
|
||||
build_url = lambda path: urllib.parse.urljoin(base_url, path)
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video_params(self, webpage, display_id):
|
||||
|
||||
@@ -1,692 +0,0 @@
|
||||
import base64
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_BASE_URL = 'https://www.crunchyroll.com'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27'
|
||||
_REFRESH_TOKEN = None
|
||||
_AUTH_HEADERS = None
|
||||
_AUTH_EXPIRY = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
|
||||
't-kdgp2h8c3jub8fn0fq',
|
||||
'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
|
||||
)).encode()).decode()
|
||||
_IS_PREMIUM = None
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
'': 'en-US',
|
||||
'es': 'es-419',
|
||||
'es-es': 'es-ES',
|
||||
'fr': 'fr-FR',
|
||||
'it': 'it-IT',
|
||||
'pt-br': 'pt-BR',
|
||||
'pt-pt': 'pt-PT',
|
||||
'ru': 'ru-RU',
|
||||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
def _set_auth_info(self, response):
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
|
||||
try:
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
|
||||
headers=headers, data=urlencode_postdata(data), impersonate=True)
|
||||
except ExtractorError as error:
|
||||
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
|
||||
raise
|
||||
if target := error.cause.response.extensions.get('impersonate'):
|
||||
raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare. '
|
||||
'Install the required impersonation dependency if possible, '
|
||||
'or else navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if not CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
return
|
||||
|
||||
try:
|
||||
login_response = self._request_token(
|
||||
headers={'Authorization': self._BASIC_AUTH}, data={
|
||||
'username': username,
|
||||
'password': password,
|
||||
'grant_type': 'password',
|
||||
'scope': 'offline_access',
|
||||
}, note='Logging in', errnote='Failed to log in')
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
|
||||
self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
|
||||
self._set_auth_info(login_response)
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
|
||||
return
|
||||
|
||||
auth_headers = {'Authorization': self._BASIC_AUTH}
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
data = {
|
||||
'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
|
||||
'grant_type': 'refresh_token',
|
||||
'scope': 'offline_access',
|
||||
}
|
||||
else:
|
||||
data = {'grant_type': 'client_id'}
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._request_token(auth_headers, data)
|
||||
except ExtractorError as error:
|
||||
username, password = self._get_login_info()
|
||||
if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
|
||||
raise
|
||||
self.to_screen('Refresh token has expired. Re-logging in')
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = None
|
||||
self.cache.store(self._NETRC_MACHINE, username, None)
|
||||
self._perform_login(username, password)
|
||||
return
|
||||
|
||||
self._set_auth_info(auth_response)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
|
||||
|
||||
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
|
||||
self._update_auth()
|
||||
|
||||
if not endpoint.startswith('/'):
|
||||
endpoint = f'/{endpoint}'
|
||||
|
||||
query = query.copy()
|
||||
locale = self._locale_from_language(lang)
|
||||
if locale:
|
||||
query['locale'] = locale
|
||||
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
|
||||
|
||||
def _call_api(self, path, internal_id, lang, note='api', query={}):
|
||||
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
|
||||
path = f'/content/v2/{self._API_ENDPOINT}/{path}'
|
||||
|
||||
try:
|
||||
result = self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
if not result:
|
||||
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
|
||||
return result
|
||||
|
||||
def _extract_chapters(self, internal_id):
|
||||
# if no skip events are available, a 403 xml error is returned
|
||||
skip_events = self._download_json(
|
||||
f'https://static.crunchyroll.com/skip-events/production/{internal_id}.json',
|
||||
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
|
||||
if not skip_events:
|
||||
return None
|
||||
|
||||
chapters = []
|
||||
for event in ('recap', 'intro', 'credits', 'preview'):
|
||||
start = traverse_obj(skip_events, (event, 'start', {float_or_none}))
|
||||
end = traverse_obj(skip_events, (event, 'end', {float_or_none}))
|
||||
# some chapters have no start and/or ending time, they will just be ignored
|
||||
if start is None or end is None:
|
||||
continue
|
||||
chapters.append({'title': event.capitalize(), 'start_time': start, 'end_time': end})
|
||||
|
||||
return chapters
|
||||
|
||||
def _extract_stream(self, identifier, display_id=None):
|
||||
if not display_id:
|
||||
display_id = identifier
|
||||
|
||||
self._update_auth()
|
||||
headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT}
|
||||
try:
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers)
|
||||
except ExtractorError as error:
|
||||
if self.get_param('ignore_no_formats_error'):
|
||||
self.report_warning(error.orig_msg)
|
||||
return [], {}
|
||||
elif isinstance(error.cause, HTTPError) and error.cause.status == 420:
|
||||
raise ExtractorError(
|
||||
'You have reached the rate-limit for active streams; try again later', expected=True)
|
||||
raise
|
||||
|
||||
available_formats = {'': ('', '', stream_response['url'])}
|
||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||
available_formats[hardsub_lang] = (f'hardsub-{hardsub_lang}', hardsub_lang, stream['url'])
|
||||
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||
hardsub_langs = [lang for lang in available_formats if lang]
|
||||
if hardsub_langs and 'all' not in requested_hardsubs:
|
||||
full_format_langs = set(requested_hardsubs)
|
||||
self.to_screen(f'Available hardsub languages: {", ".join(hardsub_langs)}')
|
||||
self.to_screen(
|
||||
'To extract formats of a hardsub language, use '
|
||||
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
|
||||
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
|
||||
only_once=True)
|
||||
else:
|
||||
full_format_langs = set(map(str.lower, available_formats))
|
||||
|
||||
audio_locale = traverse_obj(stream_response, ('audioLocale', {str}))
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
formats, subtitles = [], {}
|
||||
for format_id, hardsub_lang, stream_url in available_formats.values():
|
||||
if hardsub_lang.lower() in full_format_langs:
|
||||
adaptive_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||
stream_url, display_id, mpd_id=format_id, headers=CrunchyrollBaseIE._AUTH_HEADERS,
|
||||
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
|
||||
self._merge_subtitles(dash_subs, target=subtitles)
|
||||
else:
|
||||
continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_locale
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
|
||||
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
|
||||
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
|
||||
|
||||
# Invalidate stream token to avoid rate-limit
|
||||
error_msg = 'Unable to invalidate stream token; you may experience rate-limiting'
|
||||
if stream_token := stream_response.get('token'):
|
||||
self._request_webpage(Request(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive',
|
||||
headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False)
|
||||
else:
|
||||
self.report_warning(error_msg)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
|
||||
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
|
||||
_API_ENDPOINT = 'cms'
|
||||
_CMS_EXPIRY = None
|
||||
|
||||
def _call_cms_api_signed(self, path, internal_id, lang, note='api'):
|
||||
if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds():
|
||||
response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web']
|
||||
CrunchyrollCmsBaseIE._CMS_QUERY = {
|
||||
'Policy': response['policy'],
|
||||
'Signature': response['signature'],
|
||||
'Key-Pair-Id': response['key_pair_id'],
|
||||
}
|
||||
CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket']
|
||||
CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10
|
||||
|
||||
if not path.startswith('/cms/v2'):
|
||||
path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}'
|
||||
|
||||
return self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY)
|
||||
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
|
||||
watch/(?!concert|musicvideo)(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
# Premium only
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'info_dict': {
|
||||
'id': 'GY2P1Q98Y',
|
||||
'ext': 'mp4',
|
||||
'duration': 1380.241,
|
||||
'timestamp': 1459632600,
|
||||
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
|
||||
'title': 'World Trigger Episode 73 – To the Future',
|
||||
'upload_date': '20160402',
|
||||
'series': 'World Trigger',
|
||||
'series_id': 'GR757DMKY',
|
||||
'season': 'World Trigger',
|
||||
'season_id': 'GR9P39NJ6',
|
||||
'season_number': 1,
|
||||
'episode': 'To the Future',
|
||||
'episode_number': 73,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'chapters': 'count:2',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'extractor_args': {'crunchyrollbeta': {'hardsub': ['de-DE']}},
|
||||
'format': 'bv[format_id~=hardsub]',
|
||||
},
|
||||
}, {
|
||||
# Premium only
|
||||
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
||||
'info_dict': {
|
||||
'id': 'GYE5WKQGR',
|
||||
'ext': 'mp4',
|
||||
'duration': 366.459,
|
||||
'timestamp': 1476788400,
|
||||
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
|
||||
'title': 'SHELTER – Porter Robinson presents Shelter the Animation',
|
||||
'upload_date': '20161018',
|
||||
'series': 'SHELTER',
|
||||
'series_id': 'GYGG09WWY',
|
||||
'season': 'SHELTER',
|
||||
'season_id': 'GR09MGK4R',
|
||||
'season_number': 1,
|
||||
'episode': 'Porter Robinson presents Shelter the Animation',
|
||||
'episode_number': 0,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard',
|
||||
'info_dict': {
|
||||
'id': 'GJWU2VKK3',
|
||||
'ext': 'mp4',
|
||||
'duration': 1420.054,
|
||||
'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd',
|
||||
'title': 'The Ice Guy and His Cool Female Colleague Episode 1 – Cherry Blossom Meeting and a Coming Blizzard',
|
||||
'series': 'The Ice Guy and His Cool Female Colleague',
|
||||
'series_id': 'GW4HM75NP',
|
||||
'season': 'The Ice Guy and His Cool Female Colleague',
|
||||
'season_id': 'GY9PC21VE',
|
||||
'season_number': 1,
|
||||
'episode': 'Cherry Blossom Meeting and a Coming Blizzard',
|
||||
'episode_number': 1,
|
||||
'chapters': 'count:2',
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'timestamp': 1672839000,
|
||||
'upload_date': '20230104',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GM8F313NQ',
|
||||
'info_dict': {
|
||||
'id': 'GM8F313NQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garakowa -Restore the World-',
|
||||
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
|
||||
'duration': 3996.104,
|
||||
'age_limit': 13,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'no longer exists',
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
|
||||
'info_dict': {
|
||||
'id': 'G62PEZ2E6',
|
||||
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
|
||||
'age_limit': 13,
|
||||
'duration': 65.138,
|
||||
'title': 'Garakowa -Restore the World-',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# We want to support lazy playlist filtering and movie listings cannot be inside a playlist
|
||||
_RETURN_TYPE = 'video'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
|
||||
# We need to use unsigned API call to allow ratings query string
|
||||
response = traverse_obj(self._call_api(
|
||||
f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict}))
|
||||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
object_type = response.get('type')
|
||||
if object_type == 'episode':
|
||||
result = self._transform_episode_response(response)
|
||||
|
||||
elif object_type == 'movie':
|
||||
result = self._transform_movie_response(response)
|
||||
|
||||
elif object_type == 'movie_listing':
|
||||
first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id'))
|
||||
if not self._yes_playlist(internal_id, first_movie_id):
|
||||
return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id)
|
||||
|
||||
def entries():
|
||||
movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list')
|
||||
for movie_response in traverse_obj(movies, ('data', ...)):
|
||||
yield self.url_result(
|
||||
f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}',
|
||||
CrunchyrollBetaIE, **self._transform_movie_response(movie_response))
|
||||
|
||||
return self.playlist_result(entries(), **self._transform_movie_response(response))
|
||||
|
||||
else:
|
||||
raise ExtractorError(f'Unknown object type {object_type}')
|
||||
|
||||
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
message = f'This {object_type} is for premium members only'
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||
else:
|
||||
self.raise_login_required(message, method='password', metadata_available=True)
|
||||
else:
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
|
||||
result['chapters'] = self._extract_chapters(internal_id)
|
||||
|
||||
def calculate_count(item):
|
||||
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
|
||||
|
||||
result.update(traverse_obj(response, ('rating', {
|
||||
'like_count': ('up', {calculate_count}),
|
||||
'dislike_count': ('down', {calculate_count}),
|
||||
})))
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _transform_episode_response(data):
|
||||
metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {}
|
||||
return {
|
||||
'id': data['id'],
|
||||
'title': ' \u2013 '.join((
|
||||
('{}{}'.format(
|
||||
format_field(metadata, 'season_title'),
|
||||
format_field(metadata, 'episode', ' Episode %s'))),
|
||||
format_field(data, 'title'))),
|
||||
**traverse_obj(data, {
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', 'thumbnail', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'season': ('season_title', {str}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'season_number': ('season_number', ({int}, {float_or_none})),
|
||||
'episode_number': ('sequence_number', ({int}, {float_or_none})),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
'language': ('audio_locale', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _transform_movie_response(data):
|
||||
metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {}
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', 'thumbnail', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
series/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'info_dict': {
|
||||
'id': 'GY19NQ2QR',
|
||||
'title': 'Girl Friend BETA',
|
||||
'description': 'md5:99c1b22ee30a74b536a8277ced8eb750',
|
||||
# XXX: `thumbnail` does not get set from `thumbnails` in playlist
|
||||
# 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
|
||||
def entries():
|
||||
seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons')
|
||||
for season in traverse_obj(seasons_response, ('items', ..., {dict})):
|
||||
episodes_response = self._call_cms_api_signed(
|
||||
f'episodes?season_id={season["id"]}', season['id'], lang, 'episode list')
|
||||
for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})):
|
||||
yield self.url_result(
|
||||
f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}',
|
||||
CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response))
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), internal_id,
|
||||
**traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
'thumbnails': ('images', ..., ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
})))
|
||||
|
||||
|
||||
class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:music'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV5B02C79',
|
||||
'display_id': 'egaono-hana',
|
||||
'title': 'Egaono Hana',
|
||||
'track': 'Egaono Hana',
|
||||
'artists': ['Goose house'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genres': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV88BB7F2C',
|
||||
'display_id': 'crossing-field',
|
||||
'title': 'Crossing Field',
|
||||
'track': 'Crossing Field',
|
||||
'artists': ['LiSA'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genres': ['Anime'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'no longer exists',
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MC2E2AC135',
|
||||
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||
'artists': ['LiSA'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
|
||||
'genres': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type')
|
||||
path, name = {
|
||||
'concert': ('concerts', 'concert info'),
|
||||
'musicvideo': ('music_videos', 'music video info'),
|
||||
}[object_type]
|
||||
response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict}))
|
||||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
|
||||
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
|
||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||
else:
|
||||
self.raise_login_required(message, method='password', metadata_available=True)
|
||||
else:
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _transform_music_response(data):
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'display_id': 'slug',
|
||||
'title': 'title',
|
||||
'track': 'title',
|
||||
'artists': ('artist', 'name', all),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
|
||||
'thumbnails': ('images', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'genres': ('genres', ..., 'displayValue'),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollArtistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:artist'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
artist/(?P<id>\w{10})'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/artist/MA179CB50D',
|
||||
'info_dict': {
|
||||
'id': 'MA179CB50D',
|
||||
'title': 'LiSA',
|
||||
'genres': ['Anime', 'J-Pop', 'Rock'],
|
||||
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
|
||||
},
|
||||
'playlist_mincount': 83,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
response = traverse_obj(self._call_api(
|
||||
f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0))
|
||||
|
||||
def entries():
|
||||
for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]:
|
||||
for internal_id in traverse_obj(response, (attribute, ...)):
|
||||
yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id)
|
||||
|
||||
return self.playlist_result(entries(), **self._transform_artist_response(response))
|
||||
|
||||
@staticmethod
|
||||
def _transform_artist_response(data):
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'title': 'name',
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'genres': ('genres', ..., 'displayValue'),
|
||||
}),
|
||||
}
|
||||
@@ -1,14 +1,27 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import orderedSet
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from ..utils import extract_attributes, orderedSet
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class CTVNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||
_BASE_REGEX = r'https?://(?:[^.]+\.)?ctvnews\.ca/'
|
||||
_VIDEO_ID_RE = r'(?P<id>\d{5,})'
|
||||
_PLAYLIST_ID_RE = r'(?P<id>\d\.\d{5,})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
|
||||
rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
|
||||
rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
|
||||
rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
|
||||
rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||
'md5': '9b8624ba66351a23e0b6e1391971f9af',
|
||||
'md5': 'b608f466c7fa24b9666c6439d766ab7e',
|
||||
'info_dict': {
|
||||
'id': '901995',
|
||||
'ext': 'flv',
|
||||
@@ -16,6 +29,33 @@ class CTVNewsIE(InfoExtractor):
|
||||
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
||||
'timestamp': 1467286284,
|
||||
'upload_date': '20160630',
|
||||
'categories': [],
|
||||
'season_number': 0,
|
||||
'season': 'Season 0',
|
||||
'tags': [],
|
||||
'series': 'CTV News National | Archive | Stories 2',
|
||||
'season_id': '57981',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'duration': 764.631,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
|
||||
'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
|
||||
'info_dict': {
|
||||
'id': '3030933',
|
||||
'ext': 'flv',
|
||||
'title': 'Here’s what’s making news for Nov. 15',
|
||||
'description': 'Here are the top stories we’re working on for CTV News at 11 for Nov. 15',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
|
||||
'season_id': '58104',
|
||||
'season_number': 0,
|
||||
'tags': [],
|
||||
'season': 'Season 0',
|
||||
'categories': [],
|
||||
'series': 'CTV News Barrie',
|
||||
'upload_date': '20241116',
|
||||
'duration': 42.943,
|
||||
'timestamp': 1731722452,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
|
||||
@@ -31,6 +71,72 @@ class CTVNewsIE(InfoExtractor):
|
||||
'id': '1.2876780',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '1.5736957',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
|
||||
'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
|
||||
'info_dict': {
|
||||
'id': '2695026',
|
||||
'ext': 'flv',
|
||||
'season_id': '89852',
|
||||
'series': 'From CTV News Channel',
|
||||
'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
|
||||
'season': '2023',
|
||||
'title': 'Bank of Canada asks public about digital currency',
|
||||
'categories': [],
|
||||
'tags': [],
|
||||
'upload_date': '20230526',
|
||||
'season_number': 2023,
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'timestamp': 1685105157,
|
||||
'duration': 253.553,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
|
||||
'md5': '135cc592df607d29dddc931f1b756ae2',
|
||||
'info_dict': {
|
||||
'id': '582589',
|
||||
'ext': 'flv',
|
||||
'categories': [],
|
||||
'timestamp': 1427906183,
|
||||
'season_number': 0,
|
||||
'duration': 125.559,
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'series': 'CTV News Stox',
|
||||
'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
|
||||
'title': 'Berlin Wall',
|
||||
'season_id': '63817',
|
||||
'season': 'Season 0',
|
||||
'tags': [],
|
||||
'upload_date': '20150401',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
|
||||
'md5': 'a14c0603557decc6531260791c23cc5e',
|
||||
'info_dict': {
|
||||
'id': '3023759',
|
||||
'ext': 'flv',
|
||||
'season_number': 2024,
|
||||
'timestamp': 1731798000,
|
||||
'season': '2024',
|
||||
'episode': 'Episode 125',
|
||||
'description': 'CTV News Ottawa at Six',
|
||||
'duration': 2712.076,
|
||||
'episode_number': 125,
|
||||
'upload_date': '20241116',
|
||||
'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'categories': [],
|
||||
'tags': [],
|
||||
'series': 'CTV News Ottawa at Six',
|
||||
'season_id': '92667',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/1.810401',
|
||||
'only_matching': True,
|
||||
@@ -42,29 +148,35 @@ class CTVNewsIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _ninecninemedia_url_result(self, clip_id):
|
||||
return self.url_result(f'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE, clip_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
def ninecninemedia_url_result(clip_id):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': clip_id,
|
||||
'url': f'9c9media:ctvnews_web:{clip_id}',
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
|
||||
page_id = mobj.group('id')
|
||||
|
||||
if page_id.isdigit():
|
||||
return ninecninemedia_url_result(page_id)
|
||||
else:
|
||||
webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={
|
||||
'ot': 'example.AjaxPageLayout.ot',
|
||||
'maxItemsPerPage': 1000000,
|
||||
})
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
|
||||
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
if not entries:
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
if 'getAuthStates("' in webpage:
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in
|
||||
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
|
||||
return self.playlist_result(entries, page_id)
|
||||
if re.fullmatch(self._VIDEO_ID_RE, page_id):
|
||||
return self._ninecninemedia_url_result(page_id)
|
||||
|
||||
webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
|
||||
'ot': 'example.AjaxPageLayout.ot',
|
||||
'maxItemsPerPage': 1000000,
|
||||
})
|
||||
entries = [self._ninecninemedia_url_result(clip_id)
|
||||
for clip_id in orderedSet(re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
if not entries:
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
if 'getAuthStates("' in webpage:
|
||||
entries = [self._ninecninemedia_url_result(clip_id) for clip_id in
|
||||
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
|
||||
else:
|
||||
entries = [
|
||||
self._ninecninemedia_url_result(clip_id) for clip_id in
|
||||
traverse_obj(webpage, (
|
||||
{find_element(tag='jasper-player-container', html=True)},
|
||||
{extract_attributes}, 'axis-ids', {json.loads}, ..., 'axisId', {str}))
|
||||
]
|
||||
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
@@ -31,9 +28,6 @@ class CultureUnpluggedIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
# request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
|
||||
self._request_webpage(HEADRequest(
|
||||
'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
|
||||
movie_data = self._download_json(
|
||||
f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id)
|
||||
|
||||
|
||||
@@ -1,23 +1,53 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
IE_NAME = 'cwtv'
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch|guid)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/continuum/a-stitch-in-time/?play=9149a1e1-4cb2-46d7-81b2-47d35bbd332b',
|
||||
'info_dict': {
|
||||
'id': '9149a1e1-4cb2-46d7-81b2-47d35bbd332b',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Stitch in Time',
|
||||
'description': r're:(?s)City Protective Services officer Kiera Cameron is transported from 2077.+',
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
'duration': 2632,
|
||||
'timestamp': 1736928000,
|
||||
'uploader': 'CWTV',
|
||||
'chapters': 'count:5',
|
||||
'series': 'Continuum',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'age_limit': 14,
|
||||
'upload_date': '20250115',
|
||||
'season': 'Season 1',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'ext': 'mp4',
|
||||
'title': 'Legends of Yesterday',
|
||||
'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.',
|
||||
'description': r're:(?s)Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote.+',
|
||||
'duration': 2665,
|
||||
'series': 'Arrow',
|
||||
'season_number': 4,
|
||||
@@ -46,7 +76,7 @@ class CWTVIE(InfoExtractor):
|
||||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
'chapters': 'count:4',
|
||||
'episode': 'Episode 20',
|
||||
'season': 'Season 11',
|
||||
@@ -64,18 +94,22 @@ class CWTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.cwtv.com/movies/play/?guid=0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
f'https://images.cwtv.com/feed/app-2/video-meta/apiversion_22/device_android/guid_{video_id}', video_id)
|
||||
if traverse_obj(data, 'result') != 'ok':
|
||||
raise ExtractorError(traverse_obj(data, (('error_msg', 'msg'), {str}, any)), expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U'
|
||||
mpx_url = update_url_query(
|
||||
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
|
||||
{'formats': 'M3U+none'})
|
||||
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
@@ -97,3 +131,50 @@ class CWTVIE(InfoExtractor):
|
||||
'ie_key': 'ThePlatform',
|
||||
'thumbnail': video_data.get('large_thumbnail'),
|
||||
}
|
||||
|
||||
|
||||
class CWTVMovieIE(InfoExtractor):
|
||||
IE_NAME = 'cwtv:movie'
|
||||
_VALID_URL = r'https?://(?:www\.)?cwtv\.com/shows/(?P<id>[\w-]+)/?\?(?:[^#]+&)?viewContext=Movies'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/the-crush/?viewContext=Movies+Swimlane',
|
||||
'info_dict': {
|
||||
'id': '0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Crush',
|
||||
'upload_date': '20241112',
|
||||
'description': 'md5:1549acd90dff4a8273acd7284458363e',
|
||||
'chapters': 'count:9',
|
||||
'timestamp': 1731398400,
|
||||
'age_limit': 16,
|
||||
'duration': 5337,
|
||||
'series': 'The Crush',
|
||||
'season': 'Season 1',
|
||||
'uploader': 'CWTV',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_url = (
|
||||
self._html_search_meta('al:ios:url', webpage, default=None)
|
||||
or self._html_search_meta('al:android:url', webpage, default=None))
|
||||
video_id = (
|
||||
traverse_obj(parse_qs(app_url), ('video_id', 0, {lambda x: re.fullmatch(self._UUID_RE, x)}, 0))
|
||||
or self._search_regex([
|
||||
rf'CWTV\.Site\.curPlayingGUID\s*=\s*["\']({self._UUID_RE})',
|
||||
rf'CWTV\.Site\.viewInAppURL\s*=\s*["\']/shows/[\w-]+/watch-in-app/\?play=({self._UUID_RE})',
|
||||
], webpage, 'video ID'))
|
||||
|
||||
return self.url_result(
|
||||
f'https://www.cwtv.com/shows/{display_id}/{display_id}/?play={video_id}', CWTVIE, video_id)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import functools
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
@@ -51,6 +52,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, { # /uspaes/ in hls_url
|
||||
'url': 'https://iframe.dacast.com/vod/f9823fc6-faba-b98f-0d00-4a7b50a58c5b/348c5c84-b6af-4859-bb9d-1d01009c795b',
|
||||
'info_dict': {
|
||||
'id': '348c5c84-b6af-4859-bb9d-1d01009c795b',
|
||||
'ext': 'mp4',
|
||||
'title': 'pl1-edyta-rubas-211124.mp4',
|
||||
'uploader_id': 'f9823fc6-faba-b98f-0d00-4a7b50a58c5b',
|
||||
'thumbnail': 'https://universe-files.dacast.com/4d0bd042-a536-752d-fc34-ad2fa44bbcbb.png',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
|
||||
@@ -74,6 +84,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@functools.cached_property
|
||||
def _usp_signing_secret(self):
|
||||
player_js = self._download_webpage(
|
||||
'https://player.dacast.com/js/player.js', None, 'Downloading player JS')
|
||||
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
||||
return self._search_regex(
|
||||
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
||||
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||
@@ -94,10 +113,10 @@ class DacastVODIE(DacastBaseIE):
|
||||
if 'DRM_EXT' in hls_url:
|
||||
self.report_drm(video_id)
|
||||
elif '/uspaes/' in hls_url:
|
||||
# From https://player.dacast.com/js/player.js
|
||||
# Ref: https://player.dacast.com/js/player.js
|
||||
ts = int(time.time())
|
||||
signature = hashlib.sha1(
|
||||
f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex()
|
||||
f'{10413792000 - ts}{ts}{self._usp_signing_secret}'.encode()).digest().hex()
|
||||
hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
|
||||
|
||||
for retry in self.RetryManager():
|
||||
|
||||
@@ -10,11 +10,14 @@ from ..utils import (
|
||||
OnDemandPagedList,
|
||||
age_restricted,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -98,12 +101,20 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
dai\.ly/|
|
||||
(?:
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
|
||||
(?:www\.)?lequipe\.fr
|
||||
)/
|
||||
(?:
|
||||
swf/(?!video)|
|
||||
(?:(?:crawler|embed|swf)/)?video/|
|
||||
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
|
||||
)
|
||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
)
|
||||
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||
_TESTS = [{
|
||||
@@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
||||
@@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['en_quete_d_esprit'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
@@ -217,6 +228,86 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||
'only_matching': True,
|
||||
}, { # playlist-only
|
||||
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://dai.ly/x94cnnk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
|
||||
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
|
||||
'info_dict': {
|
||||
'id': 'x93blhi',
|
||||
'ext': 'mp4',
|
||||
'title': 'OnAir - 01/08/24',
|
||||
'description': '',
|
||||
'duration': 217,
|
||||
'timestamp': 1722505658,
|
||||
'upload_date': '20240801',
|
||||
'uploader': 'Financialounge',
|
||||
'uploader_id': 'x2vtgmm',
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080',
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
|
||||
'info_dict': {
|
||||
'id': 'x7wdsj',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
# https://www.dailymotion.com/crawler/video/x8u4owg
|
||||
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
|
||||
'info_dict': {
|
||||
'id': 'x8u4owg',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Le Parisien',
|
||||
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
|
||||
'upload_date': '20240309',
|
||||
'view_count': int,
|
||||
'timestamp': 1709997866,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x32f7b',
|
||||
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
|
||||
'duration': 428.0,
|
||||
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xry80.html?video=x8vu47w
|
||||
'url': 'https://www.metatube.com/en/videos/546765/This-frogs-decorates-Christmas-tree/',
|
||||
'info_dict': {
|
||||
'id': 'x8vu47w',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Metatube',
|
||||
'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080',
|
||||
'upload_date': '20240326',
|
||||
'view_count': int,
|
||||
'timestamp': 1711496732,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x2xpy74',
|
||||
'title': 'Está lindas ranitas ponen su arbolito',
|
||||
'duration': 28,
|
||||
'description': 'Que lindura',
|
||||
'tags': [],
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_COMMON_MEDIA_FIELDS = '''description
|
||||
@@ -231,17 +322,36 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
yield 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
player_url = url_or_none(attrs.get('src'))
|
||||
if not player_url:
|
||||
continue
|
||||
player_url = player_url.replace('.js', '.html')
|
||||
if player_url.startswith('//'):
|
||||
player_url = f'https:{player_url}'
|
||||
if video_id := attrs.get('data-video'):
|
||||
query_string = f'video={video_id}'
|
||||
elif playlist_id := attrs.get('data-playlist'):
|
||||
query_string = f'playlist={playlist_id}'
|
||||
else:
|
||||
continue
|
||||
yield update_url(player_url, query=query_string)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
||||
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
||||
|
||||
if playlist_id:
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
if is_playlist: # We matched the playlist query param as video_id
|
||||
playlist_id = video_id
|
||||
video_id = None
|
||||
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
f'http://www.dailymotion.com/playlist/{playlist_id}',
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
media = self._call_api(
|
||||
@@ -282,6 +392,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
title = metadata['title']
|
||||
is_live = media.get('isOnAir')
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for m in media_list:
|
||||
media_url = m.get('url')
|
||||
@@ -289,8 +401,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if media_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
@@ -310,20 +424,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||
f['fps'] = 60
|
||||
|
||||
subtitles = {}
|
||||
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
||||
for subtitle_lang, subtitle in subtitles_data.items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'url': subtitle_url,
|
||||
} for subtitle_url in subtitle.get('urls', [])]
|
||||
|
||||
thumbnails = []
|
||||
for height, poster_url in metadata.get('posters', {}).items():
|
||||
thumbnails.append({
|
||||
'height': int_or_none(height),
|
||||
'id': height,
|
||||
'url': poster_url,
|
||||
})
|
||||
thumbnails = traverse_obj(metadata, (
|
||||
('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
|
||||
'height': (0, {int_or_none}),
|
||||
'id': (0, {str}),
|
||||
'url': 1,
|
||||
}))
|
||||
|
||||
owner = metadata.get('owner') or {}
|
||||
stats = media.get('stats') or {}
|
||||
@@ -447,7 +559,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
|
||||
@@ -40,7 +40,7 @@ class DangalPlayBaseIE(InfoExtractor):
|
||||
'id': ('content_id', {str}),
|
||||
'title': ('display_title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'series': ('show_name', {str}, {lambda x: x or None}),
|
||||
'series': ('show_name', {str}, filter),
|
||||
'series_id': ('catalog_id', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'release_timestamp': ('release_date_uts', {int_or_none}),
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
jwt_decode_hs256,
|
||||
parse_codecs,
|
||||
try_get,
|
||||
url_or_none,
|
||||
@@ -13,9 +16,6 @@ from ..utils.traversal import traverse_obj
|
||||
class DigitalConcertHallIE(InfoExtractor):
|
||||
IE_DESC = 'DigitalConcertHall extractor'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'digitalconcerthall'
|
||||
_TESTS = [{
|
||||
'note': 'Playlist with only one video',
|
||||
@@ -69,59 +69,157 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
_LOGIN_HINT = ('Use --username token --password ACCESS_TOKEN where ACCESS_TOKEN '
|
||||
'is the "access_token_production" from your browser local storage')
|
||||
_REFRESH_HINT = 'or else use a "refresh_token" with --username refresh --password REFRESH_TOKEN'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_CLIENT_ID = 'dch.webapp'
|
||||
_CLIENT_SECRET = '2ySLN+2Fwb'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||
_OAUTH_HEADERS = {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'Origin': 'https://www.digitalconcerthall.com',
|
||||
'Referer': 'https://www.digitalconcerthall.com/',
|
||||
'User-Agent': _USER_AGENT,
|
||||
}
|
||||
_access_token = None
|
||||
_access_token_expiry = 0
|
||||
_refresh_token = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_token = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
||||
@property
|
||||
def _access_token_is_expired(self):
|
||||
return self._access_token_expiry - 30 <= int(time.time())
|
||||
|
||||
def _set_access_token(self, value):
|
||||
self._access_token = value
|
||||
self._access_token_expiry = traverse_obj(value, ({jwt_decode_hs256}, 'exp', {int})) or 0
|
||||
|
||||
def _cache_tokens(self, /):
|
||||
self.cache.store(self._NETRC_MACHINE, 'tokens', {
|
||||
'access_token': self._access_token,
|
||||
'refresh_token': self._refresh_token,
|
||||
})
|
||||
|
||||
def _fetch_new_tokens(self, invalidate=False):
|
||||
if invalidate:
|
||||
self.report_warning('Access token has been invalidated')
|
||||
self._set_access_token(None)
|
||||
|
||||
if not self._access_token_is_expired:
|
||||
return
|
||||
|
||||
if not self._refresh_token:
|
||||
self._set_access_token(None)
|
||||
self._cache_tokens()
|
||||
raise ExtractorError(
|
||||
'Access token has expired or been invalidated. '
|
||||
'Get a new "access_token_production" value from your browser '
|
||||
f'and try again, {self._REFRESH_HINT}', expected=True)
|
||||
|
||||
# If we only have a refresh token, we need a temporary "initial token" for the refresh flow
|
||||
bearer_token = self._access_token or self._download_json(
|
||||
self._OAUTH_URL, None, 'Obtaining initial token', 'Unable to obtain initial token',
|
||||
data=urlencode_postdata({
|
||||
'affiliate': 'none',
|
||||
'grant_type': 'device',
|
||||
'device_vendor': 'unknown',
|
||||
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio
|
||||
'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari',
|
||||
'app_id': 'dch.webapp',
|
||||
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio,
|
||||
# but this is no longer effective since actual login is not possible anymore
|
||||
'device_model': 'unknown',
|
||||
'app_id': self._CLIENT_ID,
|
||||
'app_distributor': 'berlinphil',
|
||||
'app_version': '1.84.0',
|
||||
'client_secret': '2ySLN+2Fwb',
|
||||
}), headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})['access_token']
|
||||
'app_version': '1.95.0',
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
}), headers=self._OAUTH_HEADERS)['access_token']
|
||||
|
||||
try:
|
||||
login_response = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
response = self._download_json(
|
||||
self._OAUTH_URL, None, 'Refreshing token', 'Unable to refresh token',
|
||||
data=urlencode_postdata({
|
||||
'grant_type': 'refresh_token',
|
||||
'refresh_token': self._refresh_token,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
}), headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'Referer': 'https://www.digitalconcerthall.com',
|
||||
'Authorization': f'Bearer {login_token}',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
**self._OAUTH_HEADERS,
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
})
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
self._set_access_token(None)
|
||||
self._refresh_token = None
|
||||
self._cache_tokens()
|
||||
raise ExtractorError('Your tokens have been invalidated', expected=True)
|
||||
raise
|
||||
self._ACCESS_TOKEN = login_response['access_token']
|
||||
|
||||
self._set_access_token(response['access_token'])
|
||||
if refresh_token := traverse_obj(response, ('refresh_token', {str})):
|
||||
self.write_debug('New refresh token granted')
|
||||
self._refresh_token = refresh_token
|
||||
self._cache_tokens()
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self.report_login()
|
||||
|
||||
if username == 'refresh':
|
||||
self._refresh_token = password
|
||||
self._fetch_new_tokens()
|
||||
|
||||
if username == 'token':
|
||||
if not traverse_obj(password, {jwt_decode_hs256}):
|
||||
raise ExtractorError(
|
||||
f'The access token passed to yt-dlp is not valid. {self._LOGIN_HINT}', expected=True)
|
||||
self._set_access_token(password)
|
||||
self._cache_tokens()
|
||||
|
||||
if username in ('refresh', 'token'):
|
||||
if self.get_param('cachedir') is not False:
|
||||
token_type = 'access' if username == 'token' else 'refresh'
|
||||
self.to_screen(f'Your {token_type} token has been cached to disk. To use the cached '
|
||||
'token next time, pass --username cache along with any password')
|
||||
return
|
||||
|
||||
if username != 'cache':
|
||||
raise ExtractorError(
|
||||
'Login with username and password is no longer supported '
|
||||
f'for this site. {self._LOGIN_HINT}, {self._REFRESH_HINT}', expected=True)
|
||||
|
||||
# Try cached access_token
|
||||
cached_tokens = self.cache.load(self._NETRC_MACHINE, 'tokens', default={})
|
||||
self._set_access_token(cached_tokens.get('access_token'))
|
||||
self._refresh_token = cached_tokens.get('refresh_token')
|
||||
if not self._access_token_is_expired:
|
||||
return
|
||||
|
||||
# Try cached refresh_token
|
||||
self._fetch_new_tokens(invalidate=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._ACCESS_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
if not self._access_token:
|
||||
self.raise_login_required(
|
||||
'All content on this site is only available for registered users. '
|
||||
f'{self._LOGIN_HINT}, {self._REFRESH_HINT}', method=None)
|
||||
|
||||
def _entries(self, items, language, type_, **kwargs):
|
||||
for item in items:
|
||||
video_id = item['id']
|
||||
stream_info = self._download_json(
|
||||
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
|
||||
for should_retry in (True, False):
|
||||
self._fetch_new_tokens(invalidate=not should_retry)
|
||||
try:
|
||||
stream_info = self._download_json(
|
||||
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._access_token}',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
break
|
||||
except ExtractorError as error:
|
||||
if should_retry and isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
continue
|
||||
raise
|
||||
|
||||
formats = []
|
||||
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
|
||||
@@ -157,7 +255,6 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'Accept': 'application/json',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
})
|
||||
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||
|
||||
|
||||
@@ -319,32 +319,6 @@ class DPlayIE(DPlayBaseIE):
|
||||
url, display_id, host, 'dplay' + country, country, domain)
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
|
||||
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
|
||||
|
||||
@@ -373,6 +347,45 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class HGTVDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'info_dict': {
|
||||
'id': '7332936',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'title': 'Vom Landleben ins Loft',
|
||||
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
|
||||
'episode': 'Episode 7',
|
||||
'episode_number': 7,
|
||||
'season': 'Season 7',
|
||||
'season_number': 7,
|
||||
'series': 'Mein Kleinstadt-Traumhaus',
|
||||
'duration': 2645.0,
|
||||
'timestamp': 1725998100,
|
||||
'upload_date': '20240910',
|
||||
'creators': ['HGTV'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
|
||||
},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod.disco-api.com',
|
||||
'realm': 'hgtv',
|
||||
'country': 'de',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
|
||||
@@ -6,8 +6,10 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,43 +38,56 @@ class DropboxIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def _yield_decoded_parts(self, webpage):
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = urllib.parse.unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
content_id = None
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if '/sm/password' in part:
|
||||
content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID')
|
||||
break
|
||||
|
||||
if password:
|
||||
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
|
||||
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Authentication failed!', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
else:
|
||||
if content_id:
|
||||
password = self.get_param('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
formats, subtitles, has_anonymous_download = [], {}, False
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': content_id,
|
||||
'password': password,
|
||||
'url': update_url(url, scheme='', netloc=''),
|
||||
}))
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
has_anonymous_download = False
|
||||
thumbnail = None
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if not has_anonymous_download:
|
||||
has_anonymous_download = self._search_regex(
|
||||
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||
r'\n.?(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
thumbnail = self._search_regex(
|
||||
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
|
||||
break
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
@@ -89,4 +104,5 @@ class DropboxIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
@@ -135,7 +135,7 @@ class DropoutIE(InfoExtractor):
|
||||
self.raise_login_required(method='any')
|
||||
raise ExtractorError(login_err, expected=True)
|
||||
|
||||
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
|
||||
embed_url = self._html_search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
watch_info = get_element_by_id('watch-info', webpage) or ''
|
||||
|
||||
|
||||
51
yt_dlp/extractor/drtalks.py
Normal file
51
yt_dlp/extractor/drtalks.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DrTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?drtalks\.com/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://drtalks.com/videos/six-pillars-of-resilience-tools-for-managing-stress-and-flourishing/',
|
||||
'info_dict': {
|
||||
'id': '6366193757112',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '6314452011001',
|
||||
'tags': ['resilience'],
|
||||
'description': 'md5:9c6805aee237ee6de8052461855b9dda',
|
||||
'timestamp': 1734546659,
|
||||
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/12/Episode-82-Eva-Selhub-DrTalks-Thumbs.jpg',
|
||||
'title': 'Six Pillars of Resilience: Tools for Managing Stress and Flourishing',
|
||||
'duration': 2800.682,
|
||||
'upload_date': '20241218',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://drtalks.com/videos/the-pcos-puzzle-mastering-metabolic-health-with-marcelle-pick/',
|
||||
'info_dict': {
|
||||
'id': '6364699891112',
|
||||
'ext': 'mp4',
|
||||
'title': 'The PCOS Puzzle: Mastering Metabolic Health with Marcelle Pick',
|
||||
'description': 'md5:e87cbe00ca50135d5702787fc4043aaa',
|
||||
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/11/Episode-34-Marcelle-Pick-OBGYN-NP-DrTalks.jpg',
|
||||
'duration': 3515.2,
|
||||
'tags': ['pcos'],
|
||||
'upload_date': '20241114',
|
||||
'timestamp': 1731592119,
|
||||
'uploader_id': '6314452011001',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
next_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']['video']
|
||||
|
||||
return self.url_result(
|
||||
next_data['videos']['brightcoveVideoLink'], BrightcoveNewIE, video_id,
|
||||
url_transparent=True,
|
||||
**traverse_obj(next_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('videos', 'summury', {str}),
|
||||
'thumbnail': ('featuredImage', 'node', 'sourceUrl', {url_or_none}),
|
||||
}))
|
||||
@@ -139,12 +139,11 @@ class DRTVIE(InfoExtractor):
|
||||
return
|
||||
|
||||
token_response = self._download_json(
|
||||
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
'https://isl.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
note='Downloading anonymous token', headers={
|
||||
'content-type': 'application/json',
|
||||
}, query={
|
||||
'device': 'web_browser',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'device': 'phone_android',
|
||||
'lang': 'da',
|
||||
'supportFallbackToken': 'true',
|
||||
}, data=json.dumps({
|
||||
|
||||
@@ -5,15 +5,16 @@ from ..utils import (
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class DuoplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
|
||||
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)(?:[/?#]|$)'
|
||||
_TESTS = [{
|
||||
'note': 'Siberi võmm S02E12',
|
||||
'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
|
||||
@@ -34,15 +35,16 @@ class DuoplayIE(InfoExtractor):
|
||||
'episode_number': 12,
|
||||
'episode_id': '24',
|
||||
},
|
||||
'skip': 'No video found',
|
||||
}, {
|
||||
'note': 'Empty title',
|
||||
'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
|
||||
'md5': '6aca68be71112314738dd17cced7f8bf',
|
||||
'md5': 'cba9f5dabf2582b224d80ac44fb80e47',
|
||||
'info_dict': {
|
||||
'id': '17_14',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ühikarotid',
|
||||
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
|
||||
'title': 'Episode 14',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'description': 'md5:4719b418e058c209def41d48b601276e',
|
||||
'upload_date': '20100916',
|
||||
'timestamp': 1284661800,
|
||||
@@ -52,6 +54,8 @@ class DuoplayIE(InfoExtractor):
|
||||
'season_number': 2,
|
||||
'episode_id': '14',
|
||||
'release_year': 2010,
|
||||
'episode': 'Episode 14',
|
||||
'episode_number': 14,
|
||||
},
|
||||
}, {
|
||||
'note': 'Movie without expiry',
|
||||
@@ -68,10 +72,32 @@ class DuoplayIE(InfoExtractor):
|
||||
'timestamp': 1671054000,
|
||||
'release_year': 2018,
|
||||
},
|
||||
'skip': 'No video found',
|
||||
}, {
|
||||
'note': 'Episode url without show name',
|
||||
'url': 'https://duoplay.ee/9644?ep=185',
|
||||
'md5': '63f324b4fe2dbd8194dca16a6d52184a',
|
||||
'info_dict': {
|
||||
'id': '9644_185',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 185',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'description': 'md5:ed25ba4e9e5d54bc291a4a0cdd241467',
|
||||
'upload_date': '20241120',
|
||||
'timestamp': 1732077000,
|
||||
'episode': 'Episode 63',
|
||||
'episode_id': '185',
|
||||
'episode_number': 63,
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'series': 'Telehommik',
|
||||
'series_id': '9644',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
telecast_id, episode = self._match_valid_url(url).group('id', 'ep')
|
||||
telecast_id = self._match_id(url)
|
||||
episode = traverse_obj(parse_qs(url), ('ep', 0, {int_or_none}, {str_or_none}))
|
||||
video_id = join_nonempty(telecast_id, episode, delim='_')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_player = try_call(lambda: extract_attributes(
|
||||
@@ -79,25 +105,33 @@ class DuoplayIE(InfoExtractor):
|
||||
if not video_player or not video_player.get('manifest-url'):
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
manifest_url = video_player['manifest-url']
|
||||
session_token = self._download_json(
|
||||
'https://sts.postimees.ee/session/register', video_id, 'Registering session',
|
||||
'Unable to register session', headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Original-URI': manifest_url,
|
||||
})['session']
|
||||
|
||||
episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
|
||||
'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4', query={'s': session_token}),
|
||||
**traverse_obj(episode_attr, {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
'title': ('title', {str}),
|
||||
'description': ('synopsis', {str}),
|
||||
'thumbnail': ('images', 'original'),
|
||||
'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
|
||||
'cast': ('cast', {lambda x: x.split(', ')}),
|
||||
'cast': ('cast', filter, {lambda x: x.split(', ')}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
**(traverse_obj(episode_attr, {
|
||||
'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))),
|
||||
'series': 'title',
|
||||
'title': (None, (('subtitle', {str}, filter), {value(f'Episode {episode}' if episode else None)})),
|
||||
'series': ('title', {str}),
|
||||
'series_id': ('telecast_id', {str_or_none}),
|
||||
'season_number': ('season_id', {int_or_none}),
|
||||
'episode': 'subtitle',
|
||||
'episode': ('subtitle', {str}, filter),
|
||||
'episode_number': ('episode_nr', {int_or_none}),
|
||||
'episode_id': ('episode_id', {str_or_none}),
|
||||
}, get_all=False) if episode_attr.get('category') != 'movies' else {}),
|
||||
|
||||
@@ -162,7 +162,7 @@ class DVTVIE(InfoExtractor):
|
||||
items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
|
||||
if items:
|
||||
return self.playlist_result(
|
||||
[self._parse_video_metadata(i, video_id, timestamp) for i in items],
|
||||
(self._parse_video_metadata(i, video_id, timestamp) for i in items),
|
||||
video_id, self._html_search_meta('twitter:title', webpage))
|
||||
|
||||
item = self._search_regex(
|
||||
|
||||
155
yt_dlp/extractor/eggs.py
Normal file
155
yt_dlp/extractor/eggs.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import secrets
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class EggsBaseIE(InfoExtractor):
|
||||
_API_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'apVersion': '8.2.00',
|
||||
'deviceName': 'Android',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._API_HEADERS['deviceId'] = secrets.token_hex(8)
|
||||
|
||||
def _call_api(self, endpoint, video_id):
|
||||
return self._download_json(
|
||||
f'https://app-front-api.eggs.mu/v1/{endpoint}', video_id,
|
||||
headers=self._API_HEADERS)
|
||||
|
||||
def _extract_music_info(self, data):
|
||||
if yt_url := traverse_obj(data, ('youtubeUrl', {url_or_none})):
|
||||
return self.url_result(yt_url, ie=YoutubeIE)
|
||||
|
||||
artist_name = traverse_obj(data, ('artist', 'artistName', {str_or_none}))
|
||||
music_id = traverse_obj(data, ('musicId', {str_or_none}))
|
||||
webpage_url = None
|
||||
if artist_name and music_id:
|
||||
webpage_url = f'https://eggs.mu/artist/{artist_name}/song/{music_id}'
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'vcodec': 'none',
|
||||
'webpage_url': webpage_url,
|
||||
'extractor_key': EggsIE.ie_key(),
|
||||
'extractor': EggsIE.IE_NAME,
|
||||
**traverse_obj(data, {
|
||||
'title': ('musicTitle', {str}),
|
||||
'url': ('musicDataPath', {url_or_none}),
|
||||
'uploader': ('artist', 'displayName', {str}),
|
||||
'uploader_id': ('artist', 'artistId', {str_or_none}),
|
||||
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||
'view_count': ('numberOfMusicPlays', {int_or_none}),
|
||||
'like_count': ('numberOfLikes', {int_or_none}),
|
||||
'comment_count': ('numberOfComments', {int_or_none}),
|
||||
'composers': ('composer', {str}, all),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'artist': ('artist', 'displayName', {str}),
|
||||
})}
|
||||
|
||||
|
||||
class EggsIE(EggsBaseIE):
|
||||
IE_NAME = 'eggs:single'
|
||||
_VALID_URL = r'https?://eggs\.mu/artist/[^/?#]+/song/(?P<id>[\da-f-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||
'info_dict': {
|
||||
'id': '0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||
'ext': 'm4a',
|
||||
'title': 'シネマと信号',
|
||||
'uploader': 'Sunny Girl',
|
||||
'thumbnail': r're:https?://.*\.jpg(?:\?.*)?$',
|
||||
'uploader_id': '1607',
|
||||
'like_count': int,
|
||||
'timestamp': 1731327327,
|
||||
'composers': ['橘高連太郎'],
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'artists': ['Sunny Girl'],
|
||||
'upload_date': '20241111',
|
||||
'tags': ['SunnyGirl', 'シネマと信号'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://eggs.mu/artist/KAMO_3pband/song/1d4bc45f-1af6-47a9-8b30-a70cae350b4f',
|
||||
'info_dict': {
|
||||
'id': '80cLKA2wnoA',
|
||||
'ext': 'mp4',
|
||||
'title': 'KAMO「いい女だから」Audio',
|
||||
'uploader': 'KAMO',
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCsHLBw2__5Q9y55skXPotOg',
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:d260da711ecbec3e720293dc11401b87',
|
||||
'availability': 'public',
|
||||
'uploader_id': '@KAMO_band',
|
||||
'upload_date': '20240925',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/80cLKA2wnoA/maxresdefault.jpg',
|
||||
'comment_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsHLBw2__5Q9y55skXPotOg',
|
||||
'view_count': int,
|
||||
'duration': 151,
|
||||
'like_count': int,
|
||||
'channel': 'KAMO',
|
||||
'playable_in_embed': True,
|
||||
'uploader_url': 'https://www.youtube.com/@KAMO_band',
|
||||
'tags': [],
|
||||
'timestamp': 1727271121,
|
||||
'age_limit': 0,
|
||||
'categories': ['People & Blogs'],
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {'skip_download': 'Youtube'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
json_data = self._call_api(f'musics/{song_id}', song_id)
|
||||
return self._extract_music_info(json_data)
|
||||
|
||||
|
||||
class EggsArtistIE(EggsBaseIE):
|
||||
IE_NAME = 'eggs:artist'
|
||||
_VALID_URL = r'https?://eggs\.mu/artist/(?P<id>\w+)/?(?:[?#&]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl',
|
||||
'info_dict': {
|
||||
'id': '32_sunny_girl',
|
||||
'thumbnail': 'https://image-pro.eggs.mu/profile/1607.jpeg?updated_at=2024-04-03T20%3A06%3A00%2B09%3A00',
|
||||
'description': 'Muddy Mine / 東京高田馬場CLUB PHASE / Gt.Vo 橘高 連太郎 / Ba.Cho 小野 ゆうき / Dr 大森 りゅうひこ',
|
||||
'title': 'Sunny Girl',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
}, {
|
||||
'url': 'https://eggs.mu/artist/KAMO_3pband',
|
||||
'info_dict': {
|
||||
'id': 'KAMO_3pband',
|
||||
'description': '川崎発3ピースバンド',
|
||||
'thumbnail': 'https://image-pro.eggs.mu/profile/35217.jpeg?updated_at=2024-11-27T16%3A31%3A50%2B09%3A00',
|
||||
'title': 'KAMO',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_id = self._match_id(url)
|
||||
artist_data = self._call_api(f'artists/{artist_id}', artist_id)
|
||||
song_data = self._call_api(f'artists/{artist_id}/musics', artist_id)
|
||||
return self.playlist_result(
|
||||
traverse_obj(song_data, ('data', ..., {dict}, {self._extract_music_info})),
|
||||
playlist_id=artist_id, **traverse_obj(artist_data, {
|
||||
'title': ('displayName', {str}),
|
||||
'description': ('profile', {str}),
|
||||
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||
}))
|
||||
@@ -207,7 +207,7 @@ class ERRJupiterIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': ('heading', {str}),
|
||||
'alt_title': ('subHeading', {str}),
|
||||
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
|
||||
'description': (('lead', 'body'), {clean_html}, filter),
|
||||
'timestamp': ('created', {int_or_none}),
|
||||
'modified_timestamp': ('updated', {int_or_none}),
|
||||
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
@@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
|
||||
def _extract_formats_and_subs(self, video_id):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(media_info, (
|
||||
'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
|
||||
'Formats', lambda _, v: url_or_none(v['Url']))):
|
||||
fmt_url = media['Url']
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return formats, subs
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
|
||||
class WatchESPNIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'info_dict': {
|
||||
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huddersfield vs. Burnley',
|
||||
'duration': 7500,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
'title': 'Abilene Chrstn vs. Texas Tech',
|
||||
'duration': 14166,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'info_dict': {
|
||||
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
|
||||
'duration': 8335,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'UC Davis vs. California',
|
||||
'duration': 9547,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'info_dict': {
|
||||
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Wheel - Episode 10',
|
||||
'duration': 3352,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'The College Football Show',
|
||||
'duration': 3639,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
|
||||
if not cookie:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
|
||||
id_token = self._download_json(
|
||||
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
|
||||
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
|
||||
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
|
||||
}).encode())['data']['token']['id_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'devices', video_id,
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'},
|
||||
@@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
|
||||
})['access_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
|
||||
'accounts/grant', video_id, payload={'id_token': id_token},
|
||||
headers={
|
||||
'Authorization': token,
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
|
||||
@@ -3,7 +3,12 @@ from ..utils import traverse_obj
|
||||
|
||||
|
||||
class EurosportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
|
||||
eurosport\.tvn24\.pl
|
||||
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||
'info_dict': {
|
||||
@@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
|
||||
'duration': 105.0,
|
||||
'upload_date': '20230518',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = None
|
||||
@@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
|
||||
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
|
||||
# but this method require to get sha256 hash
|
||||
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_initialize(self):
|
||||
if EurosportIE._TOKEN is None:
|
||||
@@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
|
||||
for stream_type in json_data['attributes']['streaming']:
|
||||
if stream_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
|
||||
elif stream_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
elif stream_type == 'mss':
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
@@ -50,7 +50,7 @@ class FacebookIE(InfoExtractor):
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
events/(?:[^/]+/)?|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
groups/[^/]+/(?:permalink|posts)/(?:[\da-f]+/)?|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
@@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'duration': 3133.583,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
@@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
@@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'md5': '1659aa21fb3dd1585874f668e81a72c8',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
'duration': 44.181,
|
||||
},
|
||||
}, {
|
||||
# FIXME: unable to extract uploader, no formats found
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
@@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1511548260,
|
||||
'upload_date': '20171124',
|
||||
'uploader': 'Vickie Gentry',
|
||||
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
'duration': 148.224,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
@@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
@@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'duration': 4524.001,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
@@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'duration': 3.283,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
@@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# FIXME: Cannot parse data error
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
@@ -407,6 +410,9 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Comitato Liberi Pensatori',
|
||||
'uploader_id': '100065709540881',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
@@ -498,7 +504,8 @@ class FacebookIE(InfoExtractor):
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
or get_first(post, ('event', 'event_creator', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
@@ -524,6 +531,11 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
|
||||
'like_count': ('likers', 'count', {int}),
|
||||
'comment_count': ('total_comment_count', {int}),
|
||||
'repost_count': ('share_count_reduced', {parse_count}),
|
||||
}), get_all=False),
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@@ -554,12 +566,13 @@ class FacebookIE(InfoExtractor):
|
||||
return extract_video_data(try_get(
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
def extract_dash_manifest(vid_data, formats, mpd_url=None):
|
||||
dash_manifest = traverse_obj(
|
||||
vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
mpd_url=url_or_none(vid_data.get('dash_manifest_url')) or mpd_url))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
@@ -609,16 +622,20 @@ class FacebookIE(InfoExtractor):
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
|
||||
# Legacy formats extraction
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
playable_url = fmt_data.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
if determine_ext(playable_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(playable_url, video_id))
|
||||
formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -626,7 +643,29 @@ class FacebookIE(InfoExtractor):
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
extract_dash_manifest(fmt_data, formats)
|
||||
|
||||
# New videoDeliveryResponse formats extraction
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
|
||||
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
|
||||
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
|
||||
for idx, dash_manifest in enumerate(dash_manifests):
|
||||
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
|
||||
if not dash_manifests:
|
||||
# Only extract from MPD URLs if the manifests are not already provided
|
||||
for mpd_url in mpd_urls:
|
||||
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
|
||||
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
|
||||
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
'quality': q(format_id) - 3,
|
||||
'url': prog_fmt['progressive_url'],
|
||||
})
|
||||
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
|
||||
|
||||
if not formats:
|
||||
# Do not append false positive entry w/o any formats
|
||||
return
|
||||
@@ -932,18 +971,21 @@ class FacebookReelIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = '1tv'
|
||||
IE_DESC = 'Первый канал'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sport)?1tv\.ru/(?:[^/?#]+/)+(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single format
|
||||
@@ -52,6 +52,9 @@ class FirstTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sport1tv.ru/sport/chempionat-rossii-po-figurnomu-kataniyu-2025',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
@@ -5,6 +6,7 @@ from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
@@ -29,6 +31,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class FranceTVIE(InfoExtractor):
|
||||
IE_NAME = 'francetv'
|
||||
_VALID_URL = r'francetv:(?P<id>[^@#]+)'
|
||||
_GEO_COUNTRIES = ['FR']
|
||||
_GEO_BYPASS = False
|
||||
@@ -248,18 +251,19 @@ class FranceTVIE(InfoExtractor):
|
||||
|
||||
|
||||
class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetv:site'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba'
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'timestamp': 1514118300,
|
||||
'duration': 2880,
|
||||
'timestamp': 1502623500,
|
||||
'duration': 2580,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20171224',
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -282,6 +286,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1441,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# geo-restricted livestream (workflow == 'token-akamai')
|
||||
'url': 'https://www.france.tv/france-4/direct.html',
|
||||
@@ -336,19 +341,32 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
nextjs_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children'))
|
||||
|
||||
if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)):
|
||||
# For livestreams we need the id of the stream instead of the currently airing episode id
|
||||
video_id = traverse_obj(nextjs_data, (
|
||||
..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ...,
|
||||
'children', ..., ..., 'children', ..., ..., 'children', ..., 'options', 'id', {str}, any))
|
||||
else:
|
||||
video_id = traverse_obj(nextjs_data, (
|
||||
..., ..., ..., 'children',
|
||||
lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path,
|
||||
'video', ('playerReplayId', 'siId'), {str}, any))
|
||||
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"',
|
||||
webpage, 'video ID')
|
||||
raise ExtractorError('Unable to extract video ID')
|
||||
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
||||
|
||||
@@ -1,349 +0,0 @@
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
make_archive_id,
|
||||
orderedSet,
|
||||
qualities,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class FunimationBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_REGION = None
|
||||
_TOKEN = None
|
||||
|
||||
def _get_region(self):
|
||||
region_cookie = self._get_cookies('https://www.funimation.com').get('region')
|
||||
region = region_cookie.value if region_cookie else self.get_param('geo_bypass_country')
|
||||
return region or traverse_obj(
|
||||
self._download_json(
|
||||
'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False,
|
||||
note='Checking geo-location', errnote='Unable to fetch geo-location information'),
|
||||
'region') or 'US'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._TOKEN:
|
||||
return
|
||||
try:
|
||||
data = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
|
||||
None, 'Logging in', data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))
|
||||
FunimationBaseIE._TOKEN = data['token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
error = self._parse_json(e.cause.response.read().decode(), None)['error']
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
|
||||
class FunimationPageIE(FunimationBaseIE):
|
||||
IE_NAME = 'funimation:page'
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:(?P<lang>[^/]+)/)?(?:shows|v)/(?P<show>[^/]+)/(?P<episode>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
||||
'info_dict': {
|
||||
'id': '210050',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
# Other metadata is tested in FunimationIE
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'add_ie': ['Funimation'],
|
||||
}, {
|
||||
# Not available in US
|
||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with lang code
|
||||
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.funimation.com/v/a-certain-scientific-railgun/super-powered-level-5',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._REGION:
|
||||
FunimationBaseIE._REGION = self._get_region()
|
||||
|
||||
def _real_extract(self, url):
|
||||
locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode')
|
||||
|
||||
video_id = traverse_obj(self._download_json(
|
||||
f'https://title-api.prd.funimationsvc.com/v1/shows/{show}/episodes/{episode}',
|
||||
f'{show}_{episode}', query={
|
||||
'deviceType': 'web',
|
||||
'region': self._REGION,
|
||||
'locale': locale or 'en',
|
||||
}), ('videoList', ..., 'id'), get_all=False)
|
||||
|
||||
return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id)
|
||||
|
||||
|
||||
class FunimationIE(FunimationBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/player/210051',
|
||||
'info_dict': {
|
||||
'id': '210050',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'episode': 'Broadcast Dub Preview',
|
||||
'episode_id': '210050',
|
||||
'season': 'Extras',
|
||||
'season_id': '166038',
|
||||
'season_number': 99,
|
||||
'series': 'Attack on Titan: Junior High',
|
||||
'description': '',
|
||||
'duration': 155,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'note': 'player_id should be extracted with the relevent compat-opt',
|
||||
'url': 'https://www.funimation.com/player/210051',
|
||||
'info_dict': {
|
||||
'id': '210051',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'episode': 'Broadcast Dub Preview',
|
||||
'episode_id': '210050',
|
||||
'season': 'Extras',
|
||||
'season_id': '166038',
|
||||
'season_number': 99,
|
||||
'series': 'Attack on Titan: Junior High',
|
||||
'description': '',
|
||||
'duration': 155,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'compat_opts': ['seperate-video-versions'],
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _get_experiences(episode):
|
||||
for lang, lang_data in episode.get('languages', {}).items():
|
||||
for video_data in lang_data.values():
|
||||
for version, f in video_data.items():
|
||||
yield lang, version.title(), f
|
||||
|
||||
def _get_episode(self, webpage, experience_id=None, episode_id=None, fatal=True):
|
||||
""" Extract the episode, season and show objects given either episode/experience id """
|
||||
show = self._parse_json(
|
||||
self._search_regex(
|
||||
r'show\s*=\s*({.+?})\s*;', webpage, 'show data', fatal=fatal),
|
||||
experience_id, transform_source=js_to_json, fatal=fatal) or []
|
||||
for season in show.get('seasons', []):
|
||||
for episode in season.get('episodes', []):
|
||||
if episode_id is not None:
|
||||
if str(episode.get('episodePk')) == episode_id:
|
||||
return episode, season, show
|
||||
continue
|
||||
for _, _, f in self._get_experiences(episode):
|
||||
if f.get('experienceId') == experience_id:
|
||||
return episode, season, show
|
||||
if fatal:
|
||||
raise ExtractorError('Unable to find episode information')
|
||||
else:
|
||||
self.report_warning('Unable to find episode information')
|
||||
return {}, {}, {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
initial_experience_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, initial_experience_id, note=f'Downloading player webpage for {initial_experience_id}')
|
||||
episode, season, show = self._get_episode(webpage, experience_id=int(initial_experience_id))
|
||||
episode_id = str(episode['episodePk'])
|
||||
display_id = episode.get('slug') or episode_id
|
||||
|
||||
formats, subtitles, thumbnails, duration = [], {}, [], 0
|
||||
requested_languages, requested_versions = self._configuration_arg('language'), self._configuration_arg('version')
|
||||
language_preference = qualities((requested_languages or [''])[::-1])
|
||||
source_preference = qualities((requested_versions or ['uncut', 'simulcast'])[::-1])
|
||||
only_initial_experience = 'seperate-video-versions' in self.get_param('compat_opts', [])
|
||||
|
||||
for lang, version, fmt in self._get_experiences(episode):
|
||||
experience_id = str(fmt['experienceId'])
|
||||
if (only_initial_experience and experience_id != initial_experience_id
|
||||
or requested_languages and lang.lower() not in requested_languages
|
||||
or requested_versions and version.lower() not in requested_versions):
|
||||
continue
|
||||
thumbnails.append({'url': fmt.get('poster')})
|
||||
duration = max(duration, fmt.get('duration', 0))
|
||||
format_name = f'{version} {lang} ({experience_id})'
|
||||
self.extract_subtitles(
|
||||
subtitles, experience_id, display_id=display_id, format_name=format_name,
|
||||
episode=episode if experience_id == initial_experience_id else episode_id)
|
||||
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = f'Token {self._TOKEN}'
|
||||
page = self._download_json(
|
||||
f'https://www.funimation.com/api/showexperience/{experience_id}/',
|
||||
display_id, headers=headers, expected_status=403, query={
|
||||
'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)),
|
||||
}, note=f'Downloading {format_name} JSON')
|
||||
sources = page.get('items') or []
|
||||
if not sources:
|
||||
error = try_get(page, lambda x: x['errors'][0], dict)
|
||||
if error:
|
||||
self.report_warning('{} said: Error {} - {}'.format(
|
||||
self.IE_NAME, error.get('code'), error.get('detail') or error.get('title')))
|
||||
else:
|
||||
self.report_warning('No sources found for format')
|
||||
|
||||
current_formats = []
|
||||
for source in sources:
|
||||
source_url = source.get('src')
|
||||
source_type = source.get('videoType') or determine_ext(source_url)
|
||||
if source_type == 'm3u8':
|
||||
current_formats.extend(self._extract_m3u8_formats(
|
||||
source_url, display_id, 'mp4', m3u8_id='{}-{}'.format(experience_id, 'hls'), fatal=False,
|
||||
note=f'Downloading {format_name} m3u8 information'))
|
||||
else:
|
||||
current_formats.append({
|
||||
'format_id': f'{experience_id}-{source_type}',
|
||||
'url': source_url,
|
||||
})
|
||||
for f in current_formats:
|
||||
# TODO: Convert language to code
|
||||
f.update({
|
||||
'language': lang,
|
||||
'format_note': version,
|
||||
'source_preference': source_preference(version.lower()),
|
||||
'language_preference': language_preference(lang.lower()),
|
||||
})
|
||||
formats.extend(current_formats)
|
||||
if not formats and (requested_languages or requested_versions):
|
||||
self.raise_no_formats(
|
||||
'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id)
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'_old_archive_ids': [make_archive_id(self, initial_experience_id)],
|
||||
'display_id': display_id,
|
||||
'duration': duration,
|
||||
'title': episode['episodeTitle'],
|
||||
'description': episode.get('episodeSummary'),
|
||||
'episode': episode.get('episodeTitle'),
|
||||
'episode_number': int_or_none(episode.get('episodeId')),
|
||||
'episode_id': episode_id,
|
||||
'season': season.get('seasonTitle'),
|
||||
'season_number': int_or_none(season.get('seasonId')),
|
||||
'season_id': str_or_none(season.get('seasonPk')),
|
||||
'series': show.get('showTitle'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'_format_sort_fields': ('lang', 'source'),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
|
||||
if isinstance(episode, str):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.funimation.com/player/{experience_id}/', display_id,
|
||||
fatal=False, note=f'Downloading player webpage for {format_name}')
|
||||
episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False)
|
||||
|
||||
for _, version, f in self._get_experiences(episode):
|
||||
for source in f.get('sources'):
|
||||
for text_track in source.get('textTracks'):
|
||||
if not text_track.get('src'):
|
||||
continue
|
||||
sub_type = text_track.get('type').upper()
|
||||
sub_type = sub_type if sub_type != 'FULL' else None
|
||||
current_sub = {
|
||||
'url': text_track['src'],
|
||||
'name': join_nonempty(version, text_track.get('label'), sub_type, delim=' '),
|
||||
}
|
||||
lang = join_nonempty(text_track.get('language', 'und'),
|
||||
version if version != 'Simulcast' else None,
|
||||
sub_type, delim='_')
|
||||
if current_sub not in subtitles.get(lang, []):
|
||||
subtitles.setdefault(lang, []).append(current_sub)
|
||||
return subtitles
|
||||
|
||||
|
||||
class FunimationShowIE(FunimationBaseIE):
|
||||
IE_NAME = 'funimation:show'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P<locale>[^/]+)?/?shows/(?P<id>[^/?#&]+))/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
|
||||
'info_dict': {
|
||||
'id': '1315000',
|
||||
'title': 'SK8 the Infinity',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# without lang code
|
||||
'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
|
||||
'info_dict': {
|
||||
'id': '39643',
|
||||
'title': 'Ouran High School Host Club',
|
||||
},
|
||||
'playlist_count': 26,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._REGION:
|
||||
FunimationBaseIE._REGION = self._get_region()
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, locale, display_id = self._match_valid_url(url).groups()
|
||||
|
||||
show_info = self._download_json(
|
||||
'https://title-api.prd.funimationsvc.com/v2/shows/{}?region={}&deviceType=web&locale={}'.format(
|
||||
display_id, self._REGION, locale or 'en'), display_id)
|
||||
items_info = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id={}'.format(
|
||||
show_info.get('id')), display_id)
|
||||
|
||||
vod_items = traverse_obj(items_info, ('items', ..., lambda k, _: re.match(r'(?i)mostRecent[AS]vod', k), 'item'))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': str_or_none(show_info['id']),
|
||||
'title': show_info['name'],
|
||||
'entries': orderedSet(
|
||||
self.url_result(
|
||||
'{}/{}'.format(base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(),
|
||||
vod_item.get('episodeId'), vod_item.get('episodeName'))
|
||||
for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder', -1))),
|
||||
}
|
||||
@@ -3,7 +3,7 @@ from .nexx import NexxIE
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8610449476156f338761a75391b0017d',
|
||||
@@ -27,6 +27,9 @@ class FunkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
141
yt_dlp/extractor/gamedevtv.py
Normal file
141
yt_dlp/extractor/gamedevtv.py
Normal file
@@ -0,0 +1,141 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GameDevTVDashboardIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamedev\.tv/dashboard/courses/(?P<course_id>\d+)(?:/(?P<lecture_id>\d+))?'
|
||||
_NETRC_MACHINE = 'gamedevtv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gamedev.tv/dashboard/courses/25',
|
||||
'info_dict': {
|
||||
'id': '25',
|
||||
'title': 'Complete Blender Creator 3: Learn 3D Modelling for Beginners',
|
||||
'tags': ['blender', 'course', 'all', 'box modelling', 'sculpting'],
|
||||
'categories': ['Blender', '3D Art'],
|
||||
'thumbnail': 'https://gamedev-files.b-cdn.net/courses/qisc9pmu1jdc.jpg',
|
||||
'upload_date': '20220516',
|
||||
'timestamp': 1652694420,
|
||||
'modified_date': '20241027',
|
||||
'modified_timestamp': 1730049658,
|
||||
},
|
||||
'playlist_count': 100,
|
||||
}, {
|
||||
'url': 'https://www.gamedev.tv/dashboard/courses/63/2279',
|
||||
'info_dict': {
|
||||
'id': 'df04f4d8-68a4-4756-a71b-9ca9446c3a01',
|
||||
'ext': 'mp4',
|
||||
'modified_timestamp': 1701695752,
|
||||
'upload_date': '20230504',
|
||||
'episode': 'MagicaVoxel Community Course Introduction',
|
||||
'series_id': '63',
|
||||
'title': 'MagicaVoxel Community Course Introduction',
|
||||
'timestamp': 1683195397,
|
||||
'modified_date': '20231204',
|
||||
'categories': ['3D Art', 'MagicaVoxel'],
|
||||
'season': 'MagicaVoxel Community Course',
|
||||
'tags': ['MagicaVoxel', 'all', 'course'],
|
||||
'series': 'MagicaVoxel 3D Art Mini Course',
|
||||
'duration': 1405,
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'season_id': '219',
|
||||
'description': 'md5:a378738c5bbec1c785d76c067652d650',
|
||||
'display_id': '63-219-2279',
|
||||
'alt_title': '1_CC_MVX MagicaVoxel Community Course Introduction.mp4',
|
||||
'thumbnail': 'https://vz-23691c65-6fa.b-cdn.net/df04f4d8-68a4-4756-a71b-9ca9446c3a01/thumbnail.jpg',
|
||||
},
|
||||
}]
|
||||
_API_HEADERS = {}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
response = self._download_json(
|
||||
'https://api.gamedev.tv/api/students/login', None, 'Logging in',
|
||||
headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps({
|
||||
'email': username,
|
||||
'password': password,
|
||||
'cart_items': [],
|
||||
}).encode())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise ExtractorError('Invalid username/password', expected=True)
|
||||
raise
|
||||
|
||||
self._API_HEADERS['Authorization'] = f'{response["token_type"]} {response["access_token"]}'
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._API_HEADERS.get('Authorization'):
|
||||
self.raise_login_required(
|
||||
'This content is only available with purchase', method='password')
|
||||
|
||||
def _entries(self, data, course_id, course_info, selected_lecture):
|
||||
for section in traverse_obj(data, ('sections', ..., {dict})):
|
||||
section_info = traverse_obj(section, {
|
||||
'season_id': ('id', {str_or_none}),
|
||||
'season': ('title', {str}),
|
||||
'season_number': ('order', {int_or_none}),
|
||||
})
|
||||
for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))):
|
||||
if selected_lecture and str(lecture.get('id')) != selected_lecture:
|
||||
continue
|
||||
display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id'))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls')
|
||||
yield {
|
||||
**course_info,
|
||||
**section_info,
|
||||
'id': display_id, # fallback
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': course_info.get('title'),
|
||||
'series_id': course_id,
|
||||
**traverse_obj(lecture, {
|
||||
'id': ('video', 'guid', {str}),
|
||||
'title': ('title', {str}),
|
||||
'alt_title': ('video', 'title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'episode': ('title', {str}),
|
||||
'episode_number': ('order', {int_or_none}),
|
||||
'duration': ('video', 'duration_in_sec', {int_or_none}),
|
||||
'timestamp': ('video', 'created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('video', 'updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('video', 'thumbnailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id, lecture_id = self._match_valid_url(url).group('course_id', 'lecture_id')
|
||||
data = self._download_json(
|
||||
f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id,
|
||||
headers=self._API_HEADERS)['data']
|
||||
|
||||
course_info = traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'categories': ('categories', ..., 'title', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
})
|
||||
|
||||
entries = self._entries(data, course_id, course_info, lecture_id)
|
||||
if lecture_id:
|
||||
lecture = next(entries, None)
|
||||
if not lecture:
|
||||
raise ExtractorError('Lecture not found')
|
||||
return lecture
|
||||
return self.playlist_result(entries, course_id, **course_info)
|
||||
@@ -8,6 +8,9 @@ from .common import InfoExtractor
|
||||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
MEDIA_EXTENSIONS,
|
||||
@@ -2340,7 +2343,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
if re.match(r'[^\s/]+\.[^\s/]+/', url):
|
||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2373,6 +2376,11 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
|
||||
impersonate = self._configuration_arg('impersonate', ['false'])
|
||||
if 'false' in impersonate:
|
||||
impersonate = None
|
||||
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||
@@ -2381,10 +2389,29 @@ class GenericIE(InfoExtractor):
|
||||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}))
|
||||
try:
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||
and e.cause.response.extensions.get('impersonate') is None):
|
||||
raise
|
||||
cf_cookie_domain = traverse_obj(
|
||||
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||
('__cf_bm', 'domain'))
|
||||
if cf_cookie_domain:
|
||||
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
|
||||
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
|
||||
'how to install the required impersonation dependency, and ')
|
||||
raise ExtractorError(
|
||||
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
|
||||
|
||||
new_url = full_response.url
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
@@ -2400,7 +2427,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||
|
||||
91
yt_dlp/extractor/germanupa.py
Normal file
91
yt_dlp/extractor/germanupa.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GermanupaIE(InfoExtractor):
|
||||
IE_DESC = 'germanupa.de'
|
||||
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
|
||||
'info_dict': {
|
||||
'id': '909179246',
|
||||
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'duration': 3987,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'audio, uses GenericIE',
|
||||
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
|
||||
'info_dict': {
|
||||
'id': '1867346676',
|
||||
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
|
||||
'ext': 'opus',
|
||||
'timestamp': 1720545088,
|
||||
'upload_date': '20240709',
|
||||
'duration': 3910.557,
|
||||
'like_count': int,
|
||||
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
|
||||
'uploader': 'German UPA',
|
||||
'repost_count': int,
|
||||
'genres': ['Science'],
|
||||
'license': 'all-rights-reserved',
|
||||
'uploader_url': 'https://soundcloud.com/user-80097677',
|
||||
'uploader_id': '471579486',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
|
||||
},
|
||||
}, {
|
||||
'note': 'Nur für Mitglieder/Just for members',
|
||||
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
|
||||
'info_dict': {
|
||||
'id': '986994430',
|
||||
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20240719',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'timestamp': 1721373980,
|
||||
'license': 'by-sa',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
|
||||
'duration': 2146,
|
||||
'release_timestamp': 1721373980,
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'upload_date': '20240719',
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
param_url = traverse_obj(
|
||||
self._search_regex(
|
||||
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
|
||||
webpage, 'embedded video', default=None, group='url'),
|
||||
({parse_qs}, 'url', 0, {url_or_none}))
|
||||
|
||||
if not param_url:
|
||||
if self._search_regex(
|
||||
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
|
||||
webpage, 'login wrapper', default=None):
|
||||
self.raise_login_required('This video is only available for members')
|
||||
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
|
||||
|
||||
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
|
||||
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)
|
||||
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
|
||||
@@ -1,32 +1,48 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class GloboIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||
_VALID_URL = r'(?:globo:|https?://[^/?#]+?\.globo\.com/(?:[^/?#]+/))(?P<id>\d{7,})'
|
||||
_NETRC_MACHINE = 'globo'
|
||||
_VIDEO_VIEW = '''
|
||||
query getVideoView($videoId: ID!) {
|
||||
video(id: $videoId) {
|
||||
duration
|
||||
description
|
||||
relatedEpisodeNumber
|
||||
relatedSeasonNumber
|
||||
headline
|
||||
title {
|
||||
originProgramId
|
||||
headline
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
||||
'url': 'https://globoplay.globo.com/v/3607726/',
|
||||
'info_dict': {
|
||||
'id': '3607726',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
|
||||
'duration': 103.204,
|
||||
'uploader': 'G1',
|
||||
'uploader_id': '2015',
|
||||
'uploader': 'G1 ao vivo',
|
||||
'uploader_id': '4209',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -38,39 +54,36 @@ class GloboIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
|
||||
'duration': 137.973,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
'uploader': 'Bom Dia Brasil',
|
||||
'uploader_id': '810',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'globo:3607726',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://globoplay.globo.com/v/10248083/',
|
||||
},
|
||||
{
|
||||
'url': 'globo:8013907', # needs subscription to globoplay
|
||||
'info_dict': {
|
||||
'id': '10248083',
|
||||
'id': '8013907',
|
||||
'ext': 'mp4',
|
||||
'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
|
||||
'duration': 530.964,
|
||||
'uploader': 'SporTV',
|
||||
'uploader_id': '698',
|
||||
'title': 'Capítulo de 14⧸08⧸1989',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'globo:12824146',
|
||||
'info_dict': {
|
||||
'id': '12824146',
|
||||
'ext': 'mp4',
|
||||
'title': 'Acordo de damas',
|
||||
'episode_number': 1,
|
||||
'season_number': 2,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -80,98 +93,70 @@ class GloboIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._request_webpage(
|
||||
HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
|
||||
video_id, 'Getting cookies')
|
||||
|
||||
video = self._download_json(
|
||||
f'http://api.globovideos.com/videos/{video_id}/playlist',
|
||||
video_id)['videos'][0]
|
||||
if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True:
|
||||
self.report_drm(video_id)
|
||||
|
||||
title = video['title']
|
||||
info = self._download_json(
|
||||
'https://cloud-jarvis.globo.com/graphql', video_id,
|
||||
query={
|
||||
'operationName': 'getVideoView',
|
||||
'variables': json.dumps({'videoId': video_id}),
|
||||
'query': self._VIDEO_VIEW,
|
||||
}, headers={
|
||||
'content-type': 'application/json',
|
||||
'x-platform-id': 'web',
|
||||
'x-device-id': 'desktop',
|
||||
'x-client-version': '2024.12-5',
|
||||
})['data']['video']
|
||||
|
||||
formats = []
|
||||
security = self._download_json(
|
||||
'https://playback.video.globo.com/v2/video-session', video_id, f'Downloading security hash for {video_id}',
|
||||
headers={'content-type': 'application/json'}, data=json.dumps({
|
||||
'player_type': 'desktop',
|
||||
video = self._download_json(
|
||||
'https://playback.video.globo.com/v4/video-session', video_id,
|
||||
f'Downloading resource info for {video_id}',
|
||||
headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(filter_dict({
|
||||
'player_type': 'mirakulo_8k_hdr',
|
||||
'video_id': video_id,
|
||||
'quality': 'max',
|
||||
'content_protection': 'widevine',
|
||||
'vsid': '581b986b-4c40-71f0-5a58-803e579d5fa2',
|
||||
'tz': '-3.0:00',
|
||||
}).encode())
|
||||
'vsid': f'{uuid.uuid4()}',
|
||||
'consumption': 'streaming',
|
||||
'capabilities': {'low_latency': True},
|
||||
'tz': '-03:00',
|
||||
'Authorization': try_get(self._get_cookies('https://globo.com'),
|
||||
lambda x: f'Bearer {x["GLBID"].value}'),
|
||||
'version': 1,
|
||||
})).encode())
|
||||
|
||||
self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie')
|
||||
if traverse_obj(video, ('resource', 'drm_protection_enabled', {bool})):
|
||||
self.report_drm(video_id)
|
||||
|
||||
security_hash = security['sources'][0]['token']
|
||||
if not security_hash:
|
||||
message = security.get('message')
|
||||
if message:
|
||||
raise ExtractorError(
|
||||
f'{self.IE_NAME} returned error: {message}', expected=True)
|
||||
main_source = video['sources'][0]
|
||||
|
||||
hash_code = security_hash[:2]
|
||||
padding = '%010d' % random.randint(1, 10000000000)
|
||||
if hash_code in ('04', '14'):
|
||||
received_time = security_hash[3:13]
|
||||
received_md5 = security_hash[24:]
|
||||
hash_prefix = security_hash[:23]
|
||||
elif hash_code in ('02', '12', '03', '13'):
|
||||
received_time = security_hash[2:12]
|
||||
received_md5 = security_hash[22:]
|
||||
padding += '1'
|
||||
hash_prefix = '05' + security_hash[:22]
|
||||
|
||||
padded_sign_time = str(int(received_time) + 86400) + padding
|
||||
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
|
||||
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||
signed_hash = hash_prefix + padded_sign_time + signed_md5
|
||||
source = security['sources'][0]['url_parts']
|
||||
resource_url = source['scheme'] + '://' + source['domain'] + source['path']
|
||||
signed_url = '{}?h={}&k=html5&a={}'.format(resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
|
||||
|
||||
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
|
||||
for resource in video['resources']:
|
||||
if resource.get('type') == 'subtitle':
|
||||
subtitles.setdefault(resource.get('language') or 'por', []).append({
|
||||
'url': resource.get('url'),
|
||||
})
|
||||
subs = try_get(security, lambda x: x['source']['subtitles'], expected_type=dict) or {}
|
||||
for sub_lang, sub_url in subs.items():
|
||||
if sub_url:
|
||||
subtitles.setdefault(sub_lang or 'por', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
subs = try_get(security, lambda x: x['source']['subtitles_webvtt'], expected_type=dict) or {}
|
||||
for sub_lang, sub_url in subs.items():
|
||||
if sub_url:
|
||||
subtitles.setdefault(sub_lang or 'por', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
duration = float_or_none(video.get('duration'), 1000)
|
||||
uploader = video.get('channel')
|
||||
uploader_id = str_or_none(video.get('channel_id'))
|
||||
# 4k streams are exclusively outputted in dash, so we need to filter these out
|
||||
if determine_ext(main_source['url']) == 'mpd':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(main_source['url'], video_id, mpd_id='dash')
|
||||
else:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
main_source['url'], video_id, 'mp4', m3u8_id='hls')
|
||||
self._merge_subtitles(traverse_obj(main_source, ('text', ..., {
|
||||
'url': ('subtitle', 'srt', 'url', {url_or_none}),
|
||||
}, all, {subs_list_to_dict(lang='en')})), target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
**traverse_obj(info, {
|
||||
'title': ('headline', {str}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'uploader': ('title', 'headline', {str}),
|
||||
'uploader_id': ('title', 'originProgramId', {str_or_none}),
|
||||
'episode_number': ('relatedEpisodeNumber', {int_or_none}),
|
||||
'season_number': ('relatedSeasonNumber', {int_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class GloboArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?!globoplay).+?\.globo\.com/(?:[^/?#]+/)*(?P<id>[^/?#.]+)(?:\.html)?'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id=["\'](\d{7,})["\']',
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GolemIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||
|
||||
@@ -1,40 +1,48 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GoodGameIE(InfoExtractor):
|
||||
IE_NAME = 'goodgame:stream'
|
||||
_VALID_URL = r'https?://goodgame\.ru/channel/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://goodgame\.ru/(?!channel/)(?P<id>[\w.*-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://goodgame.ru/channel/Pomi/#autoplay',
|
||||
'url': 'https://goodgame.ru/TGW#autoplay',
|
||||
'info_dict': {
|
||||
'id': 'pomi',
|
||||
'id': '7998',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Reynor vs Special \(1/2,bo3\) Wardi Spring EU \- playoff \(финальный день\) \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'channel_id': '1644',
|
||||
'channel': 'Pomi',
|
||||
'channel_url': 'https://goodgame.ru/channel/Pomi/',
|
||||
'description': 'md5:4a87b775ee7b2b57bdccebe285bbe171',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'channel_id': '7998',
|
||||
'title': r're:шоуматч Happy \(NE\) vs Fortitude \(UD\), потом ладдер и дс \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'channel_url': 'https://goodgame.ru/TGW',
|
||||
'thumbnail': 'https://hls.goodgame.ru/previews/7998_240.jpg',
|
||||
'uploader': 'TGW',
|
||||
'channel': 'JosephStalin',
|
||||
'live_status': 'is_live',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'channel_follower_count': int,
|
||||
'uploader_id': '2899',
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'May not be online',
|
||||
}, {
|
||||
'url': 'https://goodgame.ru/Mr.Gray',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://goodgame.ru/HeDoPa3yMeHue*',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
response = self._download_json(f'https://api2.goodgame.ru/v2/streams/{channel_name}', channel_name)
|
||||
player_id = response['channel']['gg_player_src']
|
||||
response = self._download_json(f'https://goodgame.ru/api/4/users/{channel_name}/stream', channel_name)
|
||||
player_id = response['streamkey']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
if response.get('status') == 'Live':
|
||||
if response.get('status'):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://hls.goodgame.ru/manifest/{player_id}_master.m3u8',
|
||||
channel_name, 'mp4', live=True)
|
||||
@@ -45,13 +53,17 @@ class GoodGameIE(InfoExtractor):
|
||||
'id': player_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': traverse_obj(response, ('channel', 'title')),
|
||||
'channel': channel_name,
|
||||
'channel_id': str_or_none(traverse_obj(response, ('channel', 'id'))),
|
||||
'channel_url': response.get('url'),
|
||||
'description': clean_html(traverse_obj(response, ('channel', 'description'))),
|
||||
'thumbnail': traverse_obj(response, ('channel', 'thumb')),
|
||||
'is_live': bool(formats),
|
||||
'view_count': int_or_none(response.get('viewers')),
|
||||
'age_limit': 18 if traverse_obj(response, ('channel', 'adult')) else None,
|
||||
**traverse_obj(response, {
|
||||
'title': ('title', {str}),
|
||||
'channel': ('channelkey', {str}),
|
||||
'channel_id': ('id', {str_or_none}),
|
||||
'channel_url': ('link', {url_or_none}),
|
||||
'uploader': ('streamer', 'username', {str}),
|
||||
'uploader_id': ('streamer', 'id', {str_or_none}),
|
||||
'thumbnail': ('preview', {url_or_none}, {self._proto_relative_url}),
|
||||
'concurrent_view_count': ('viewers', {int_or_none}),
|
||||
'channel_follower_count': ('followers', {int_or_none}),
|
||||
'age_limit': ('adult', {bool}, {lambda x: 18 if x else None}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -5,56 +5,62 @@ import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class GoPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)'
|
||||
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
|
||||
|
||||
_NETRC_MACHINE = 'goplay'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay',
|
||||
'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
|
||||
'info_dict': {
|
||||
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811',
|
||||
'id': '2baa4560-87a0-421b-bffc-359914e3c387',
|
||||
'ext': 'mp4',
|
||||
'title': 'S3 - Aflevering 2',
|
||||
'series': 'De Container Cup',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'title': 'S22 - Aflevering 1',
|
||||
'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
|
||||
'series': 'De Slimste Mens ter Wereld',
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 22,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 22',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay',
|
||||
'url': 'https://www.goplay.be/video/1917',
|
||||
'info_dict': {
|
||||
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf',
|
||||
'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Family for the Holidays',
|
||||
'title': '1917',
|
||||
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
|
||||
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
|
||||
'ext': 'mp4',
|
||||
'title': 'S11 - Aflevering 1',
|
||||
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
|
||||
'episode': 'Episode 1',
|
||||
'series': 'De Mol',
|
||||
'season_number': 11,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 11',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}]
|
||||
|
||||
@@ -69,27 +75,44 @@ class GoPlayIE(InfoExtractor):
|
||||
if not self._id_token:
|
||||
raise self.raise_login_required(method='password')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, display_id = self._match_valid_url(url).group(0, 'display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
|
||||
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
|
||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
movie = video_data.get('movie')
|
||||
if movie:
|
||||
video_id = movie['videoUuid']
|
||||
info_dict = {
|
||||
'title': movie.get('title'),
|
||||
}
|
||||
else:
|
||||
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False)
|
||||
video_id = episode['videoUuid']
|
||||
info_dict = {
|
||||
'title': episode.get('episodeTitle'),
|
||||
'series': traverse_obj(episode, ('program', 'title')),
|
||||
'season_number': episode.get('seasonNumber'),
|
||||
'episode_number': episode.get('episodeNumber'),
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {json.loads}, ..., {self._find_json}, ...))
|
||||
meta = traverse_obj(nextjs_data, (
|
||||
..., ..., 'children', ..., ..., 'children',
|
||||
lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any))
|
||||
|
||||
video_id = meta['uuid']
|
||||
info_dict = traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
})
|
||||
|
||||
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
|
||||
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
|
||||
episode_data = traverse_obj(
|
||||
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
|
||||
if not episode_data:
|
||||
continue
|
||||
|
||||
episode_title = traverse_obj(
|
||||
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
|
||||
info_dict.update({
|
||||
'title': episode_title or info_dict.get('title'),
|
||||
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
|
||||
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
|
||||
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
|
||||
})
|
||||
break
|
||||
|
||||
api = self._download_json(
|
||||
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
|
||||
@@ -8,15 +8,19 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
IE_NAME = 'huya:live'
|
||||
IE_DESC = 'huya.com'
|
||||
TESTS = [{
|
||||
@@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '572329',
|
||||
'title': str,
|
||||
'ext': 'flv',
|
||||
'description': str,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
@@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
|
||||
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
|
||||
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
|
||||
return fm, ss
|
||||
|
||||
|
||||
class HuyaVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
|
||||
IE_NAME = 'huya:video'
|
||||
IE_DESC = '虎牙视频'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.huya.com/video/play/1002412640.html',
|
||||
'info_dict': {
|
||||
'id': '1002412640',
|
||||
'ext': 'mp4',
|
||||
'title': '8月3日',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 14,
|
||||
'uploader': '虎牙-ATS欧卡车队青木',
|
||||
'uploader_id': '1564376151',
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.huya.com/video/play/556054543.html',
|
||||
'info_dict': {
|
||||
'id': '556054543',
|
||||
'ext': 'mp4',
|
||||
'title': '我不挑事 也不怕事',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 1864,
|
||||
'uploader': '卡尔',
|
||||
'uploader_id': '367138632',
|
||||
'upload_date': '20210811',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url: str):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent', video_id,
|
||||
query={'videoId': video_id})['data']['moment']['videoInfo']
|
||||
|
||||
formats = []
|
||||
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': definition['url'],
|
||||
**traverse_obj(definition, {
|
||||
'format_id': ('defName', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('videoCover', {url_or_none}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'uploader': ('nickName', {str}),
|
||||
'uploader_id': ('uid', {str_or_none}),
|
||||
'upload_date': ('videoUploadTime', {unified_strdate}),
|
||||
'view_count': ('videoPlayNum', {int_or_none}),
|
||||
'comment_count': ('videoCommentNum', {int_or_none}),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -63,7 +62,7 @@ class IlPostIE(InfoExtractor):
|
||||
'url': ('podcast_raw_url', {url_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('milliseconds', {float_or_none(scale=1000)}),
|
||||
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ImgurIE(ImgurBaseIE):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
@@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE):
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 'MRW gifv is up and running without any bugs',
|
||||
'timestamp': 1416446068,
|
||||
'upload_date': '20141120',
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1416446068,
|
||||
'release_date': '20141120',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
'only_matching': True,
|
||||
@@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE):
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1710491255,
|
||||
'release_date': '20240315',
|
||||
'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -208,7 +225,10 @@ class ImgurIE(ImgurBaseIE):
|
||||
}), get_all=False),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': url_or_none(search('thumbnailUrl')),
|
||||
'thumbnails': [{
|
||||
'url': thumbnail_url,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}
|
||||
|
||||
@@ -252,17 +272,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE):
|
||||
|
||||
class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:gallery'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/gallery/Q95ko',
|
||||
'info_dict': {
|
||||
'id': 'Q95ko',
|
||||
'title': 'Adding faces make every GIF better',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
'skip': 'Zoinks! You\'ve taken a wrong turn.',
|
||||
}, {
|
||||
# TODO: static images - replace with animated/video gallery
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'only_matching': True,
|
||||
@@ -280,7 +292,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'release_timestamp': 1358554297,
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'release_date': '20130119',
|
||||
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# Test with slug
|
||||
'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx',
|
||||
'add_ies': ['Imgur'],
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
'timestamp': 1358554297,
|
||||
'upload_date': '20130119',
|
||||
'uploader_id': '1648642',
|
||||
'uploader': 'wittyusernamehere',
|
||||
'release_timestamp': 1358554297,
|
||||
'release_date': '20130119',
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
@@ -317,6 +349,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ',
|
||||
'info_dict': {
|
||||
'id': '6lAn9VQ',
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
|
||||
'add_ies': ['Imgur'],
|
||||
@@ -357,7 +396,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
|
||||
class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:album'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
_GALLERY = False
|
||||
_TESTS = [{
|
||||
# TODO: only static images - replace with animated/video gallery
|
||||
@@ -372,6 +411,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX',
|
||||
'info_dict': {
|
||||
'id': 'iX265HX',
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://imgur.com/a/8pih2Ed',
|
||||
'info_dict': {
|
||||
|
||||
@@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor):
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@@ -255,7 +254,7 @@ class InstagramIOSIE(InfoExtractor):
|
||||
|
||||
|
||||
class InstagramIE(InstagramBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/(?!share/)[^/?#]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
@@ -435,10 +434,10 @@ class InstagramIE(InstagramBaseIE):
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'doc_id': '8845758582119845',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
||||
|
||||
if not general_info:
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
|
||||
@@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
|
||||
'id': 'p51388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
|
||||
'upload_date': '20201103',
|
||||
'timestamp': 1604437480,
|
||||
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
|
||||
'episode': 'Partička (92)',
|
||||
'season': 'Partička',
|
||||
'series': 'Prima Partička',
|
||||
'episode_number': 92,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
|
||||
'info_dict': {
|
||||
'id': 'p1412199',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 3,
|
||||
'episode': 'Tenerife: V říši ohně',
|
||||
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
|
||||
'duration': 3111.0,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
|
||||
'title': 'Tenerife: V říši ohně',
|
||||
'timestamp': 1711825800,
|
||||
'upload_date': '20240330',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
@@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
|
||||
video_id = self._search_regex((
|
||||
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
@@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
|
||||
final_result = self._search_json_ld(webpage, video_id, default={})
|
||||
final_result.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': final_result.get('title') or title,
|
||||
'thumbnail': self._html_search_meta(
|
||||
['thumbnail', 'og:image', 'twitter:image'],
|
||||
webpage, 'thumbnail', default=None),
|
||||
|
||||
@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
|
||||
|
||||
|
||||
class SangiinInstructionIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
IE_DESC = False # this shouldn't be listed as a supported site
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
||||
raise ExtractorError(
|
||||
'Copy the link from the button below the video description/player '
|
||||
'and use that link to download. If there is no button in the frame, '
|
||||
'get the URL of the frame showing the video.', expected=True)
|
||||
|
||||
|
||||
class SangiinIE(InfoExtractor):
|
||||
|
||||
@@ -326,11 +326,11 @@ class JioCinemaIE(JioCinemaBaseIE):
|
||||
# fallback metadata
|
||||
'title': ('name', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('show', 'name', {str}, {lambda x: x or None}),
|
||||
'series': ('show', 'name', {str}, filter),
|
||||
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('totalDuration', {float_or_none}),
|
||||
'thumbnail': ('images', {url_or_none}),
|
||||
@@ -338,10 +338,10 @@ class JioCinemaIE(JioCinemaBaseIE):
|
||||
**traverse_obj(metadata, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}, {lambda x: x or None}),
|
||||
'season': ('seasonName', {str}, {lambda x: x or None}),
|
||||
'series': ('showName', {str}, filter),
|
||||
'season': ('seasonName', {str}, filter),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'season_id': ('seasonId', {str}, {lambda x: x or None}),
|
||||
'season_id': ('seasonId', {str}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
|
||||
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
|
||||
160
yt_dlp/extractor/kenh14.py
Normal file
160
yt_dlp/extractor/kenh14.py
Normal file
@@ -0,0 +1,160 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_attribute,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Kenh14VideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
|
||||
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
|
||||
'info_dict': {
|
||||
'id': '316173',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
|
||||
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
|
||||
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
|
||||
'tags': [],
|
||||
'uploader': 'Unbox Therapy',
|
||||
'upload_date': '20220517',
|
||||
'view_count': int,
|
||||
'duration': 722.86,
|
||||
'timestamp': 1652764468,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/video-316174.chn',
|
||||
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
|
||||
'info_dict': {
|
||||
'id': '316174',
|
||||
'ext': 'mp4',
|
||||
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
|
||||
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
|
||||
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
|
||||
'tags': [],
|
||||
'upload_date': '20220517',
|
||||
'view_count': int,
|
||||
'duration': 70.04,
|
||||
'timestamp': 1652766021,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/0-344740.chn',
|
||||
'md5': 'b843495d5e728142c8870c09b46df2a9',
|
||||
'info_dict': {
|
||||
'id': '344740',
|
||||
'ext': 'mov',
|
||||
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
|
||||
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
|
||||
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
|
||||
'uploader': 'Quang Vũ',
|
||||
'upload_date': '20241024',
|
||||
'view_count': int,
|
||||
'duration': 198.88,
|
||||
'timestamp': 1729741590,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
|
||||
direct_url = attrs['data-vid']
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
|
||||
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
|
||||
|
||||
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
|
||||
subtitles = {}
|
||||
video_data = self._download_json(
|
||||
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
|
||||
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
dash_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'uploader': ('author', {strip_or_none}),
|
||||
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': (
|
||||
traverse_obj(metadata, ('title', {strip_or_none}))
|
||||
or clean_html(self._og_search_title(webpage))
|
||||
or clean_html(get_element_by_class('vdbw-title', webpage))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': (
|
||||
clean_html(self._og_search_description(webpage))
|
||||
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
|
||||
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
|
||||
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
|
||||
{lambda x: x.split(';')}, ..., filter)),
|
||||
}
|
||||
|
||||
|
||||
class Kenh14PlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
|
||||
'info_dict': {
|
||||
'id': '71',
|
||||
'title': 'Trần Tình (Naked love) mùa 2',
|
||||
'description': 'md5:e9522339304956dea931722dd72eddb2',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
|
||||
'info_dict': {
|
||||
'id': '72',
|
||||
'title': 'Lau Lại Đầu Từ',
|
||||
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
category_detail = get_element_by_class('category-detail', webpage) or ''
|
||||
embed_info = traverse_obj(
|
||||
self._yield_json_ld(webpage, playlist_id),
|
||||
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
|
||||
|
||||
return self.playlist_from_matches(
|
||||
get_elements_html_by_class('video-item', webpage), playlist_id,
|
||||
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
|
||||
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
|
||||
ie=Kenh14VideoIE, playlist_description=(
|
||||
clean_html(get_element_by_class('description', category_detail))
|
||||
or unescapeHTML(embed_info.get('alternateName'))),
|
||||
thumbnail=traverse_obj(
|
||||
self._og_search_thumbnail(webpage),
|
||||
({url_or_none}, {update_url(query=None)})))
|
||||
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
|
||||
_PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
|
||||
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
@@ -39,7 +39,7 @@ class KhanAcademyBaseIE(InfoExtractor):
|
||||
query={
|
||||
'fastly_cacheable': 'persist_until_publish',
|
||||
'pcv': self._PUBLISHED_CONTENT_VERSION,
|
||||
'hash': '1242644265',
|
||||
'hash': '3712657851',
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'countryCode': 'US',
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
@@ -67,7 +66,7 @@ class KickIE(KickBaseIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if KickClipIE.suitable(url) else super().suitable(url)
|
||||
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
@@ -98,25 +97,25 @@ class KickIE(KickBaseIE):
|
||||
|
||||
class KickVODIE(KickBaseIE):
|
||||
IE_NAME = 'kick:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c',
|
||||
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
||||
'info_dict': {
|
||||
'id': 'e74614f4-5270-4319-90ad-32179f19a45c',
|
||||
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'ext': 'mp4',
|
||||
'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+',
|
||||
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
|
||||
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
|
||||
'channel': 'xqc',
|
||||
'channel_id': '668',
|
||||
'uploader': 'xQc',
|
||||
'uploader_id': '676',
|
||||
'upload_date': '20240724',
|
||||
'timestamp': 1721796562,
|
||||
'duration': 18566.0,
|
||||
'upload_date': '20240909',
|
||||
'timestamp': 1725919141,
|
||||
'duration': 10155.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['VALORANT'],
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
@@ -137,7 +136,7 @@ class KickVODIE(KickBaseIE):
|
||||
'uploader': ('livestream', 'channel', 'user', 'username', {str}),
|
||||
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('livestream', 'duration', {float_or_none(scale=1000)}),
|
||||
'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
|
||||
'categories': ('livestream', 'categories', ..., 'name', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
@@ -148,7 +147,7 @@ class KickVODIE(KickBaseIE):
|
||||
|
||||
class KickClipIE(KickBaseIE):
|
||||
IE_NAME = 'kick:clips'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?P<id>clip_[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'info_dict': {
|
||||
@@ -189,6 +188,26 @@ class KickClipIE(KickBaseIE):
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'info_dict': {
|
||||
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'ext': 'mp4',
|
||||
'title': 'KLJASLDJKLJKASDLJKDAS',
|
||||
'channel': 'spreen',
|
||||
'channel_id': '5312671',
|
||||
'uploader': 'AnormalBarraBaja',
|
||||
'uploader_id': '26518262',
|
||||
'duration': 43.0,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727399987,
|
||||
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Minecraft'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
126
yt_dlp/extractor/kika.py
Normal file
126
yt_dlp/extractor/kika.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class KikaIE(InfoExtractor):
|
||||
IE_DESC = 'KiKA.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
|
||||
'info_dict': {
|
||||
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240831',
|
||||
'timestamp': 1725126600,
|
||||
'season_number': 2024,
|
||||
'modified_date': '20240831',
|
||||
'episode': 'Episode 476',
|
||||
'episode_number': 476,
|
||||
'season': 'Season 2024',
|
||||
'duration': 634,
|
||||
'title': 'logo! vom Samstag, 31. August 2024',
|
||||
'modified_timestamp': 1725129983,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/kaltstart/videos/video92498',
|
||||
'md5': '710ece827e5055094afeb474beacb7aa',
|
||||
'info_dict': {
|
||||
'id': 'video92498',
|
||||
'ext': 'mp4',
|
||||
'title': '7. Wo ist Leo?',
|
||||
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
|
||||
'duration': 436,
|
||||
'timestamp': 1702926876,
|
||||
'upload_date': '20231218',
|
||||
'episode_number': 7,
|
||||
'modified_date': '20240319',
|
||||
'modified_timestamp': 1710880610,
|
||||
'episode': 'Episode 7',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
|
||||
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
|
||||
'info_dict': {
|
||||
'id': 'video90088',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20221102',
|
||||
'timestamp': 1667390580,
|
||||
'duration': 197,
|
||||
'modified_timestamp': 1711093771,
|
||||
'episode_number': 8,
|
||||
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
|
||||
'modified_date': '20240322',
|
||||
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 8',
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
|
||||
video_assets = self._download_json(doc['assets']['url'], video_id)
|
||||
|
||||
subtitles = {}
|
||||
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
|
||||
subtitles['de'] = [{
|
||||
'url': ttml_resource,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': webvtt_resource,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': list(self._extract_formats(video_assets, video_id)),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(doc, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'modified_timestamp': ('modificationDate', {parse_iso8601}),
|
||||
'duration': ((
|
||||
('durationInSeconds', {int_or_none}),
|
||||
('duration', {parse_duration})), any),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
|
||||
stream_url = media['url']
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'm3u8':
|
||||
yield from self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
yield {
|
||||
'url': stream_url,
|
||||
'format_id': ext,
|
||||
**traverse_obj(media, {
|
||||
'width': ('frameWidth', {int_or_none}),
|
||||
'height': ('frameHeight', {int_or_none}),
|
||||
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
|
||||
'filesize': ('fileSize', {int_or_none}, filter),
|
||||
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
}),
|
||||
}
|
||||
@@ -32,14 +32,14 @@ class LaracastsBaseIE(InfoExtractor):
|
||||
VimeoIE, url_transparent=True,
|
||||
**traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
|
||||
'webpage_url': ('path', {urljoin('https://laracasts.com')}),
|
||||
'title': ('title', {clean_html}),
|
||||
'season_number': ('chapter', {int_or_none}),
|
||||
'episode_number': ('position', {int_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('largeThumbnail', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'date': ('dateSegments', 'published', {unified_strdate}),
|
||||
'upload_date': ('dateSegments', 'published', {unified_strdate}),
|
||||
}))
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ class LaracastsIE(LaracastsBaseIE):
|
||||
'title': 'Hello, Laravel',
|
||||
'ext': 'mp4',
|
||||
'duration': 519,
|
||||
'date': '20240312',
|
||||
'upload_date': '20240312',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
|
||||
'description': 'md5:ddd658bb241975871d236555657e1dd1',
|
||||
'season_number': 1,
|
||||
@@ -104,7 +104,7 @@ class LaracastsPlaylistIE(LaracastsBaseIE):
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
|
||||
'duration': ('runTime', {parse_duration}),
|
||||
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
|
||||
'categories': ('taxonomy', 'name', {str}, all, filter),
|
||||
'tags': ('topics', ..., 'name', {str}),
|
||||
'modified_date': ('lastUpdated', {unified_strdate}),
|
||||
}),
|
||||
|
||||
@@ -66,7 +66,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
'license': ('value', 'license', {str}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'release_timestamp': ('value', 'release_time', {int_or_none}),
|
||||
'tags': ('value', 'tags', ..., {lambda x: x or None}),
|
||||
'tags': ('value', 'tags', ..., filter),
|
||||
'duration': ('value', stream_type, 'duration', {int_or_none}),
|
||||
'channel': ('signing_channel', 'value', 'title', {str}),
|
||||
'channel_id': ('signing_channel', 'claim_id', {str}),
|
||||
@@ -136,6 +136,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
IE_DESC = 'odysee.com'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
|
||||
(?:\$/(?:download|embed)/)?
|
||||
(?P<id>
|
||||
@@ -309,7 +310,13 @@ class LBRYIE(LBRYBaseIE):
|
||||
if stream_type in self._SUPPORTED_STREAM_TYPES:
|
||||
claim_id, is_live = result['claim_id'], False
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
'get', claim_id, {
|
||||
'uri': uri,
|
||||
**traverse_obj(parse_qs(url), {
|
||||
'signature': ('signature', 0),
|
||||
'signature_ts': ('signature_ts', 0),
|
||||
}),
|
||||
}, 'streaming url')['streaming_url']
|
||||
|
||||
# GET request to v3 API returns original video/audio file if available
|
||||
direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
|
||||
@@ -364,6 +371,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
IE_DESC = 'odysee.com channels'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
@@ -391,6 +399,7 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
|
||||
class LBRYPlaylistIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:playlist'
|
||||
IE_DESC = 'odysee.com playlists'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
|
||||
@@ -6,13 +6,11 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class LearningOnScreenIE(InfoExtractor):
|
||||
@@ -32,28 +30,24 @@ class LearningOnScreenIE(InfoExtractor):
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||
self.raise_login_required(
|
||||
'Use --cookies for authentication. See '
|
||||
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
|
||||
'for how to manually pass cookies', method=None)
|
||||
self.raise_login_required(method='session_cookies')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
details = traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'programme-details')}, {
|
||||
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
|
||||
{find_element(id='programme-details', html=True)}, {
|
||||
'title': ({find_element(tag='h2')}, {clean_html}),
|
||||
'timestamp': (
|
||||
{functools.partial(get_element_by_class, 'broadcast-date')},
|
||||
{find_element(cls='broadcast-date')},
|
||||
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
|
||||
'duration': (
|
||||
{functools.partial(get_element_by_class, 'prog-running-time')},
|
||||
{clean_html}, {parse_duration}),
|
||||
{find_element(cls='prog-running-time')}, {clean_html}, {parse_duration}),
|
||||
}))
|
||||
|
||||
title = details.pop('title', None) or traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
|
||||
{find_element(id='add-to-existing-playlist', html=True)},
|
||||
{extract_attributes}, 'data-record-title', {clean_html}))
|
||||
|
||||
entries = self._parse_html5_media_entries(
|
||||
|
||||
@@ -6,12 +6,10 @@ from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
get_element_text_and_html_by_tag,
|
||||
parse_duration,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class ListenNotesIE(InfoExtractor):
|
||||
@@ -22,14 +20,14 @@ class ListenNotesIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'KrDgvNb_u1n',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:32236591a921adf17bbdbf0441b6c0e9',
|
||||
'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd',
|
||||
'duration': 2148.0,
|
||||
'channel': 'Thriving on Overload',
|
||||
'title': r're:Tim O’Reilly on noticing things other people .{113}',
|
||||
'description': r're:(?s)‘’We shape reality by what we notice and .{27459}',
|
||||
'duration': 2215.0,
|
||||
'channel': 'Amplifying Cognition',
|
||||
'channel_id': 'ed84wITivxF',
|
||||
'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/',
|
||||
'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/amplifying-cognition-ross-dawson-Iemft4Gdr0k-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/amplifying-cognition-ross-dawson-ed84wITivxF/',
|
||||
'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
|
||||
},
|
||||
}, {
|
||||
@@ -39,13 +37,13 @@ class ListenNotesIE(InfoExtractor):
|
||||
'id': 'lwEA3154JzG',
|
||||
'ext': 'mp3',
|
||||
'title': 'Episode 177: WireGuard with Jason Donenfeld',
|
||||
'description': 'md5:24744f36456a3e95f83c1193a3458594',
|
||||
'description': r're:(?s)Jason Donenfeld lead developer joins us this hour to discuss WireGuard, .{3169}',
|
||||
'duration': 3861.0,
|
||||
'channel': 'Ask Noah Show',
|
||||
'channel_id': '4DQTzdS5-j7',
|
||||
'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg',
|
||||
'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-gD7vG150cxf-4DQTzdS5-j7.300x300.jpg',
|
||||
'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
|
||||
},
|
||||
}]
|
||||
@@ -70,7 +68,7 @@ class ListenNotesIE(InfoExtractor):
|
||||
'id': audio_id,
|
||||
'url': data['audio'],
|
||||
'title': (data.get('data-title')
|
||||
or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
|
||||
or traverse_obj(webpage, ({find_element(tag='h1')}, {clean_html}))
|
||||
or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
|
||||
'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
|
||||
or strip_or_none(description)),
|
||||
|
||||
@@ -1,30 +1,32 @@
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unsmuggle_url,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class LiTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
|
||||
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P<id>[\w-]+)'
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s'
|
||||
_GEO_COUNTRIES = ['TW']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00041610',
|
||||
'info_dict': {
|
||||
'id': 'VOD00041606',
|
||||
'title': '花千骨',
|
||||
},
|
||||
'playlist_count': 51, # 50 episodes + 1 trailer
|
||||
}, {
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00041610',
|
||||
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
|
||||
'info_dict': {
|
||||
'id': 'VOD00041610',
|
||||
@@ -32,16 +34,15 @@ class LiTVIE(InfoExtractor):
|
||||
'title': '花千骨第1集',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
|
||||
'categories': ['奇幻', '愛情', '中國', '仙俠'],
|
||||
'categories': ['奇幻', '愛情', '仙俠', '古裝'],
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
'skip': 'Georestricted to Taiwan',
|
||||
}, {
|
||||
'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00044841',
|
||||
'md5': '88322ea132f848d6e3e18b32a832b918',
|
||||
'info_dict': {
|
||||
'id': 'VOD00044841',
|
||||
@@ -55,94 +56,62 @@ class LiTVIE(InfoExtractor):
|
||||
def _extract_playlist(self, playlist_data, content_type):
|
||||
all_episodes = [
|
||||
self.url_result(smuggle_url(
|
||||
self._URL_TEMPLATE % (content_type, episode['contentId']),
|
||||
self._URL_TEMPLATE % (content_type, episode['content_id']),
|
||||
{'force_noplaylist': True})) # To prevent infinite recursion
|
||||
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
|
||||
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))]
|
||||
|
||||
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
|
||||
return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
|
||||
if self._search_regex(
|
||||
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
|
||||
webpage, 'meta refresh redirect', default=False, group=0):
|
||||
raise ExtractorError('No such content found', expected=True)
|
||||
program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {}
|
||||
playlist_data = traverse_obj(vod_data, ('seriesTree'))
|
||||
if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data):
|
||||
return self._extract_playlist(playlist_data, program_info.get('content_type'))
|
||||
|
||||
program_info = self._parse_json(self._search_regex(
|
||||
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||
video_id)
|
||||
asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str}))
|
||||
if asset_id: # This is a VOD
|
||||
media_type = 'vod'
|
||||
else: # This is a live stream
|
||||
asset_id = program_info['content_id']
|
||||
media_type = program_info['content_type']
|
||||
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||
if puid:
|
||||
endpoint = 'get-urls'
|
||||
else:
|
||||
puid = str(uuid.uuid4())
|
||||
endpoint = 'get-urls-no-auth'
|
||||
video_data = self._download_json(
|
||||
f'https://www.litv.tv/api/{endpoint}', video_id,
|
||||
data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
# In browsers `getProgramInfo` request is always issued. Usually this
|
||||
# endpoint gives the same result as the data embedded in the webpage.
|
||||
# If, for some reason, there are no embedded data, we do an extra request.
|
||||
if 'assetId' not in program_info:
|
||||
program_info = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
|
||||
query={'contentId': video_id},
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
series_id = program_info['seriesId']
|
||||
if self._yes_playlist(series_id, video_id, smuggled_data):
|
||||
playlist_data = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
|
||||
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
|
||||
return self._extract_playlist(playlist_data, program_info['contentType'])
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||
webpage, 'video data', default='{}'), video_id)
|
||||
if not video_data:
|
||||
payload = {'assetId': program_info['assetId']}
|
||||
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||
if puid:
|
||||
payload.update({
|
||||
'type': 'auth',
|
||||
'puid': puid,
|
||||
})
|
||||
endpoint = 'getUrl'
|
||||
else:
|
||||
payload.update({
|
||||
'watchDevices': program_info['watchDevices'],
|
||||
'contentType': program_info['contentType'],
|
||||
})
|
||||
endpoint = 'getMainUrlNoAuth'
|
||||
video_data = self._download_json(
|
||||
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
if not video_data.get('fullpath'):
|
||||
error_msg = video_data.get('errorMessage')
|
||||
if error_msg == 'vod.error.outsideregionerror':
|
||||
if error := traverse_obj(video_data, ('error', {dict})):
|
||||
error_msg = traverse_obj(error, ('message', {str}))
|
||||
if error_msg and 'OutsideRegionError' in error_msg:
|
||||
self.raise_geo_restricted('This video is available in Taiwan only')
|
||||
if error_msg:
|
||||
elif error_msg:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True)
|
||||
raise ExtractorError(f'Unexpected result from {self.IE_NAME}')
|
||||
raise ExtractorError(f'Unexpected error from {self.IE_NAME}')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['fullpath'], video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls')
|
||||
for a_format in formats:
|
||||
# LiTV HLS segments doesn't like compressions
|
||||
a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
|
||||
|
||||
title = program_info['title'] + program_info.get('secondaryMark', '')
|
||||
description = program_info.get('description')
|
||||
thumbnail = program_info.get('imageFile')
|
||||
categories = [item['name'] for item in program_info.get('category', [])]
|
||||
episode = int_or_none(program_info.get('episode'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'episode_number': episode,
|
||||
'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info),
|
||||
**traverse_obj(program_info, {
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}),
|
||||
'categories': ('genres', ..., 'name', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,86 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||
'info_dict': {
|
||||
'id': '10809',
|
||||
'ext': 'mp4',
|
||||
'title': "Put'ka: Trys Klausimai",
|
||||
'upload_date': '20161216',
|
||||
'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
|
||||
'age_limit': 18,
|
||||
'duration': 117,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1481904000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
|
||||
'info_dict': {
|
||||
'id': '10467',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nėrdas: Kompiuterio Valymas',
|
||||
'upload_date': '20150113',
|
||||
'description': 'md5:7352d113a242a808676ff17e69db6a69',
|
||||
'age_limit': 18,
|
||||
'duration': 346,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421164800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_LIMITS = {
|
||||
'N-7': 7,
|
||||
'N-14': 14,
|
||||
'S': 18,
|
||||
}
|
||||
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
|
||||
display_id)['videoConfig']['videoInfo']
|
||||
|
||||
video_id = str(video_info['id'])
|
||||
title = video_info['title']
|
||||
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'description': clean_html(video_info.get('htmlDescription')),
|
||||
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
|
||||
'timestamp': parse_iso8601(video_info.get('airDate')),
|
||||
'view_count': int_or_none(video_info.get('viewsCount')),
|
||||
}
|
||||
|
||||
|
||||
class LnkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'
|
||||
|
||||
@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response
|
||||
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': '8488817242a0db1cb2ad0ea522553cf6',
|
||||
'md5': 'b321d261656848c184a94e3b93eae28d',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
|
||||
'timestamp': 1657216459,
|
||||
'duration': 181,
|
||||
},
|
||||
'params': {'format': 'bestvideo'}, # Test video-only fixup
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
|
||||
format_url = format_url.replace('-split.m3u8', '.m3u8')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
|
||||
# Sometimes only split video/audio formats are available, need to fixup video-only formats
|
||||
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
|
||||
for fmt in m3u8_formats:
|
||||
if is_not_premerged and fmt.get('vcodec') != 'none':
|
||||
fmt['acodec'] = 'none'
|
||||
yield {
|
||||
**fmt,
|
||||
'url': update_url(fmt['url'], query=query),
|
||||
|
||||
@@ -114,7 +114,7 @@ class LSMLREmbedIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = traverse_obj(query, (
|
||||
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
|
||||
('show', 'id'), 0, {int_or_none}, filter, {str_or_none}), get_all=False)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data, media_data = self._search_regex(
|
||||
|
||||
@@ -57,6 +57,6 @@ class MagentaMusikIE(InfoExtractor):
|
||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
||||
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
|
||||
'release_year': ('yearOfProduction', {int_or_none}),
|
||||
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
|
||||
'categories': ('mainGenre', {str}, all, filter),
|
||||
})),
|
||||
}
|
||||
|
||||
@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
|
||||
video_data = None
|
||||
|
||||
# fix meta_url if missing the host address
|
||||
if re.match(r'^\/\+\/', meta_url):
|
||||
if re.match(r'\/\+\/', meta_url):
|
||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||
|
||||
if meta_url:
|
||||
|
||||
@@ -13,8 +13,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
IE_DESC = 'MDR.DE'
|
||||
_VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
@@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
||||
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
||||
'info_dict': {
|
||||
'id': '19636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baumhaus vom 30. Oktober 2015',
|
||||
'duration': 134,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
||||
'info_dict': {
|
||||
'id': '8182',
|
||||
'ext': 'mp4',
|
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||
'timestamp': 1482541200,
|
||||
'upload_date': '20161224',
|
||||
'duration': 4628,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
# audio with alternative playerURL pattern
|
||||
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
|
||||
@@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
|
||||
'duration': 3239,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
# empty bitrateVideo and bitrateAudio
|
||||
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
|
||||
'info_dict': {
|
||||
'id': '128372',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der kleine Wichtel kehrt zurück',
|
||||
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
|
||||
'duration': 4876,
|
||||
'timestamp': 1607823300,
|
||||
'upload_date': '20201213',
|
||||
'uploader': 'ZDF',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
|
||||
(?P<id>[^/#?_]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
|
||||
'info_dict': {
|
||||
'id': '668177',
|
||||
'title': 'Az ajtó',
|
||||
'display_id': 'az-ajto',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
|
||||
},
|
||||
}, {
|
||||
# (old) mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||
'info_dict': {
|
||||
@@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230903',
|
||||
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) m4sport
|
||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||
@@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230908',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# m4sport with *video/ url and no date
|
||||
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
|
||||
@@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) hirado
|
||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||
@@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230911',
|
||||
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}, {
|
||||
# (old) petofilive
|
||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||
@@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230909',
|
||||
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -143,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
|
||||
if not playlist_url:
|
||||
raise ExtractorError('Unable to extract playlist url')
|
||||
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ class MediaStreamBaseIE(InfoExtractor):
|
||||
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||
|
||||
def _extract_mediastream_urls(self, webpage):
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None, default={})), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
IE_NAME = 'MangoTV'
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user