mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-06 06:51:30 +00:00
merge 'master'
This commit is contained in:
@@ -208,6 +208,10 @@ from .bandcamp import (
|
||||
BandcampUserIE,
|
||||
BandcampWeeklyIE,
|
||||
)
|
||||
from .bandlab import (
|
||||
BandlabIE,
|
||||
BandlabPlaylistIE,
|
||||
)
|
||||
from .bannedvideo import BannedVideoIE
|
||||
from .bbc import (
|
||||
BBCIE,
|
||||
@@ -217,6 +221,7 @@ from .bbc import (
|
||||
BBCCoUkIPlayerGroupIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
)
|
||||
from .beacon import BeaconTvIE
|
||||
from .beatbump import (
|
||||
BeatBumpPlaylistIE,
|
||||
BeatBumpVideoIE,
|
||||
@@ -277,6 +282,7 @@ from .bleacherreport import (
|
||||
from .blerp import BlerpIE
|
||||
from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bluesky import BlueskyIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boosty import BoostyIE
|
||||
@@ -362,7 +368,10 @@ from .ccc import (
|
||||
)
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .cda import (
|
||||
CDAIE,
|
||||
CDAFolderIE,
|
||||
)
|
||||
from .cellebrite import CellebriteIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .cgtn import CGTNIE
|
||||
@@ -397,8 +406,6 @@ from .cmt import CMTIE
|
||||
from .cnbc import CNBCVideoIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNArticleIE,
|
||||
CNNBlogsIE,
|
||||
CNNIndonesiaIE,
|
||||
)
|
||||
from .comedycentral import (
|
||||
@@ -548,6 +555,7 @@ from .dropout import (
|
||||
DropoutIE,
|
||||
DropoutSeasonIE,
|
||||
)
|
||||
from .drtalks import DrTalksIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import (
|
||||
DRTVIE,
|
||||
@@ -705,6 +713,7 @@ from .gab import (
|
||||
GabTVIE,
|
||||
)
|
||||
from .gaia import GaiaIE
|
||||
from .gamedevtv import GameDevTVDashboardIE
|
||||
from .gamejolt import (
|
||||
GameJoltCommunityIE,
|
||||
GameJoltGameIE,
|
||||
@@ -729,6 +738,7 @@ from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .germanupa import GermanupaIE
|
||||
from .getcourseru import (
|
||||
GetCourseRuIE,
|
||||
GetCourseRuPlayerIE,
|
||||
@@ -822,7 +832,10 @@ from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .huya import HuyaLiveIE
|
||||
from .huya import (
|
||||
HuyaLiveIE,
|
||||
HuyaVideoIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .hypergryph import MonsterSirenHypergryphMusicIE
|
||||
from .hytale import HytaleIE
|
||||
@@ -934,6 +947,10 @@ from .kaltura import KalturaIE
|
||||
from .kankanews import KankaNewsIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .kelbyone import KelbyOneIE
|
||||
from .kenh14 import (
|
||||
Kenh14PlaylistIE,
|
||||
Kenh14VideoIE,
|
||||
)
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
KhanAcademyUnitIE,
|
||||
@@ -1037,10 +1054,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .livestreamfails import LivestreamfailsIE
|
||||
from .lnkgo import (
|
||||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .lnk import LnkIE
|
||||
from .loom import (
|
||||
LoomFolderIE,
|
||||
LoomIE,
|
||||
@@ -1126,12 +1140,6 @@ from .microsoftembed import (
|
||||
MicrosoftMediusIE,
|
||||
)
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .mildom import (
|
||||
MildomClipIE,
|
||||
MildomIE,
|
||||
MildomUserVodIE,
|
||||
MildomVodIE,
|
||||
)
|
||||
from .minds import (
|
||||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
@@ -1151,6 +1159,7 @@ from .mitele import MiTeleIE
|
||||
from .mixch import (
|
||||
MixchArchiveIE,
|
||||
MixchIE,
|
||||
MixchMovieIE,
|
||||
)
|
||||
from .mixcloud import (
|
||||
MixcloudIE,
|
||||
@@ -1165,6 +1174,7 @@ from .mlb import (
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mocha import MochaVideoIE
|
||||
from .mojevideo import MojevideoIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .motherless import (
|
||||
@@ -1511,8 +1521,8 @@ from .pgatour import PGATourIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pialive import PiaLiveIE
|
||||
from .piapro import PiaproIE
|
||||
from .piaulizaportal import PIAULIZAPortalIE
|
||||
from .picarto import (
|
||||
PicartoIE,
|
||||
PicartoVodIE,
|
||||
@@ -1542,16 +1552,13 @@ from .pluralsight import (
|
||||
PluralsightIE,
|
||||
)
|
||||
from .plutotv import PlutoTVIE
|
||||
from .plvideo import PlVideoIE
|
||||
from .podbayfm import (
|
||||
PodbayFMChannelIE,
|
||||
PodbayFMIE,
|
||||
)
|
||||
from .podchaser import PodchaserIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import (
|
||||
PokemonIE,
|
||||
PokemonWatchIE,
|
||||
)
|
||||
from .pokergo import (
|
||||
PokerGoCollectionIE,
|
||||
PokerGoIE,
|
||||
@@ -1642,6 +1649,7 @@ from .radiokapital import (
|
||||
RadioKapitalIE,
|
||||
RadioKapitalShowIE,
|
||||
)
|
||||
from .radioradicale import RadioRadicaleIE
|
||||
from .radiozet import RadioZetPodcastIE
|
||||
from .radlive import (
|
||||
RadLiveChannelIE,
|
||||
@@ -1811,6 +1819,7 @@ from .screen9 import Screen9IE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastify import ScreencastifyIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenrec import ScreenRecIE
|
||||
from .scrippsnetworks import (
|
||||
ScrippsNetworksIE,
|
||||
ScrippsNetworksWatchIE,
|
||||
@@ -1821,6 +1830,7 @@ from .scte import (
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .sejmpl import SejmIE
|
||||
from .sen import SenIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import (
|
||||
SenateGovIE,
|
||||
@@ -1877,6 +1887,7 @@ from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snapchat import SnapchatSpotlightIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
@@ -1931,9 +1942,7 @@ from .spotify import (
|
||||
)
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
SpreakerShowIE,
|
||||
SpreakerShowPageIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
@@ -2244,6 +2253,10 @@ from .ufctv import (
|
||||
)
|
||||
from .ukcolumn import UkColumnIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .uliza import (
|
||||
UlizaPlayerIE,
|
||||
UlizaPortalIE,
|
||||
)
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
@@ -2272,10 +2285,6 @@ from .utreon import UtreonIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veo import VeoIE
|
||||
from .veoh import (
|
||||
VeohIE,
|
||||
VeohUserIE,
|
||||
)
|
||||
from .vesti import VestiIE
|
||||
from .vevo import (
|
||||
VevoIE,
|
||||
@@ -2348,10 +2357,6 @@ from .vimm import (
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .vine import (
|
||||
VineIE,
|
||||
VineUserIE,
|
||||
)
|
||||
from .viously import ViouslyIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
|
||||
@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'skip': 'This program is not currently available in ABC iview',
|
||||
}, {
|
||||
'url': 'https://iview.abc.net.au/show/inbestigators',
|
||||
'info_dict': {
|
||||
'id': '175343-1',
|
||||
'title': 'Series 1',
|
||||
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
|
||||
'series': 'The Inbestigators',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage_data = self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
video_data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
|
||||
transform_source=lambda x: x.encode().decode('unicode_escape'),
|
||||
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
|
||||
@@ -6,7 +6,6 @@ import hmac
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
@@ -18,10 +17,8 @@ from ..networking.exceptions import TransportError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
@@ -72,15 +69,15 @@ class AbemaLicenseRH(RequestHandler):
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
encvideokey = list(res.to_bytes(16, 'big'))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
enckey = list(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
return bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
||||
@@ -11,11 +11,9 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
@@ -198,16 +196,16 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join(random.choices('0123456789abcdef', k=16))
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
message = list(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
}))
|
||||
}).encode())
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry with
|
||||
# a different random padding
|
||||
links_data = None
|
||||
for _ in range(3):
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
padded_message = bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
@@ -234,7 +232,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
|
||||
@@ -1355,13 +1355,14 @@ MSO_INFO = {
|
||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
headers.update(kwargs.get('headers') or {})
|
||||
kwargs['headers'] = headers
|
||||
return super()._download_webpage_handle(
|
||||
*args, **kwargs)
|
||||
@@ -1454,7 +1455,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
}, headers={
|
||||
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
'User-Agent': self._MODERN_USER_AGENT,
|
||||
} if mso_id == 'Comcast_SSO' else None)
|
||||
elif not self._cookies_passed:
|
||||
raise_mvpd_required()
|
||||
|
||||
|
||||
@@ -33,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'https://login.sooplive.co.kr/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
|
||||
-6: 'https://login.sooplive.co.kr/membership/changeMember.php',
|
||||
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
|
||||
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
|
||||
-12: 'https://member.sooplive.co.kr/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-7: 'You cannot use your Global Soop account to access Korean Soop.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
@@ -61,76 +61,48 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
return self._download_json(Request(
|
||||
f'https://api.m.afreecatv.com/{endpoint}',
|
||||
f'https://api.m.sooplive.co.kr/{endpoint}',
|
||||
data=data, headers=headers, query=query,
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
@staticmethod
|
||||
def _fixup_thumb(thumb_url):
|
||||
if not url_or_none(thumb_url):
|
||||
return None
|
||||
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
|
||||
return [{'url': thumb_url, 'ext': 'jpg'}]
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)/?(?:$|[?#&])
|
||||
'''
|
||||
IE_NAME = 'soop'
|
||||
IE_DESC = 'sooplive.co.kr'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'url': 'https://vod.sooplive.co.kr/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
@@ -142,12 +114,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'url': 'https://vod.sooplive.co.kr/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
@@ -157,36 +129,17 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/70395877',
|
||||
'url': 'https://vod.sooplive.co.kr/player/70395877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subscribers only
|
||||
'url': 'https://vod.afreecatv.com/player/104647403',
|
||||
'url': 'https://vod.sooplive.co.kr/player/104647403',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private
|
||||
'url': 'https://vod.afreecatv.com/player/81669846',
|
||||
'url': 'https://vod.sooplive.co.kr/player/81669846',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -209,8 +162,8 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('bj_id', {str}),
|
||||
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
})
|
||||
|
||||
entries = []
|
||||
@@ -233,7 +186,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
|
||||
'formats': formats,
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('file_start', {unified_timestamp}),
|
||||
}),
|
||||
})
|
||||
@@ -262,11 +215,11 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
|
||||
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:catchstory'
|
||||
IE_DESC = 'afreecatv.com catch story'
|
||||
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
|
||||
IE_NAME = 'soop:catchstory'
|
||||
IE_DESC = 'sooplive.co.kr catch story'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
|
||||
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
@@ -281,29 +234,28 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
@staticmethod
|
||||
def _entries(data):
|
||||
def _entries(self, data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:live'
|
||||
IE_DESC = 'afreecatv.com livestreams'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
IE_NAME = 'soop:live'
|
||||
IE_DESC = 'sooplive.co.kr livestreams'
|
||||
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'info_dict': {
|
||||
'id': '237852185',
|
||||
'ext': 'mp4',
|
||||
@@ -315,30 +267,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
|
||||
_WORKING_CDNS = [
|
||||
'gcp_cdn', # live-global-cdn-v02.afreecatv.com
|
||||
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
|
||||
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
|
||||
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
|
||||
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
|
||||
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
|
||||
]
|
||||
_BAD_CDNS = [
|
||||
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
|
||||
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
|
||||
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
|
||||
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
|
||||
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
|
||||
]
|
||||
|
||||
def _extract_formats(self, channel_info, broadcast_no, aid):
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
|
||||
|
||||
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
|
||||
default_cdn_ids = orderedSet([
|
||||
@@ -358,7 +310,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
try:
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
|
||||
headers={'Referer': 'https://play.afreecatv.com/'})
|
||||
headers={'Referer': 'https://play.sooplive.co.kr/'})
|
||||
except ExtractorError as e:
|
||||
if attempt == len(cdn_ids):
|
||||
raise
|
||||
@@ -374,7 +326,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
if not broadcast_no:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
result = channel_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
elif result == -6:
|
||||
self.raise_login_required(
|
||||
'This channel is streaming for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract broadcast number')
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if channel_info.get('BPWD') == 'Y' and password is None:
|
||||
@@ -403,7 +361,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
formats = self._extract_formats(channel_info, broadcast_no, aid)
|
||||
|
||||
station_info = traverse_obj(self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
|
||||
'Downloading channel metadata', 'Unable to download channel metadata',
|
||||
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
|
||||
|
||||
@@ -419,11 +377,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:user'
|
||||
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -431,7 +389,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
@@ -439,7 +397,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -447,7 +405,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -459,12 +417,12 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
|
||||
@@ -71,7 +71,7 @@ class AllstarBaseIE(InfoExtractor):
|
||||
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
|
||||
'duration': ('clipLength', {int_or_none}),
|
||||
'filesize': ('clipSizeBytes', {int_or_none}),
|
||||
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('createdDate', {int_or_none(scale=1000)}),
|
||||
'uploader': ('username', {str}),
|
||||
'uploader_id': ('user', '_id', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
|
||||
@@ -8,10 +8,8 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
@@ -33,24 +31,6 @@ class AnvatoIE(InfoExtractor):
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
@@ -241,31 +221,6 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id), # noqa: UP031
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
}
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
return int_or_none(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||
@@ -277,8 +232,8 @@ class AnvatoIE(InfoExtractor):
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
auth_secret = bytes(aes_encrypt(
|
||||
list(input_data[:64].encode()), list(self._AUTH_KEY)))
|
||||
query = {
|
||||
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
|
||||
'rtyp': 'fp',
|
||||
@@ -290,8 +245,6 @@ class AnvatoIE(InfoExtractor):
|
||||
}
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
|
||||
@@ -1,27 +1,42 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'duration': 5369,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
@@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
# The reviewbody is None for one of the reviews; just need to extract data without crashing
|
||||
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'info_dict': {
|
||||
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'ext': 'mp3',
|
||||
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'creators': ['Grateful Dead'],
|
||||
'duration': 338.31,
|
||||
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
|
||||
'display_id': 'gd95-04-02d1t04.shn',
|
||||
'location': 'Pyramid Arena',
|
||||
'uploader': 'jon@archive.org',
|
||||
'album': '1995-04-02 - Pyramid Arena',
|
||||
'upload_date': '20040519',
|
||||
'track_number': 4,
|
||||
'release_date': '19950402',
|
||||
'timestamp': 1084927901,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
info['comments'].append({
|
||||
'id': review.get('review_id'),
|
||||
'author': review.get('reviewer'),
|
||||
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
|
||||
'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
|
||||
'timestamp': unified_timestamp(review.get('createdate')),
|
||||
'parent': 'root'})
|
||||
|
||||
|
||||
@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
@@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2700,
|
||||
'duration': 2670,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
@@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'info_dict': {
|
||||
'id': '13847165',
|
||||
'chapters': 'count:8',
|
||||
'ext': 'mp4',
|
||||
'channel': 'WDR',
|
||||
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'series': 'Lokalzeit aus Düsseldorf',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
|
||||
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'upload_date': '20241031',
|
||||
'timestamp': 1730399400,
|
||||
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
|
||||
'duration': 1759,
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
@@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'age_limit': age_limit,
|
||||
**traverse_obj(media_data, {
|
||||
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
|
||||
'start_time': ('chapterTime', {int_or_none}),
|
||||
'title': ('chapterTitle', {str}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(media_data, ('meta', {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
@@ -470,7 +494,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -6,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
@@ -17,6 +19,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
@@ -40,6 +43,8 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader_url': 'https://youtube-dl.bandcamp.com',
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
'artists': ['youtube-dl "\'/\\ä↭'],
|
||||
'album_artists': ['youtube-dl "\'/\\ä↭'],
|
||||
},
|
||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
}, {
|
||||
@@ -266,6 +271,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311756226,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
|
||||
'album_artists': ['Blazo'],
|
||||
'uploader_url': 'https://blazo.bandcamp.com',
|
||||
'release_date': '20110727',
|
||||
'release_timestamp': 1311724800.0,
|
||||
'track': 'Intro',
|
||||
'uploader_id': 'blazo',
|
||||
'track_number': 1,
|
||||
'album': 'Jazz Format Mixtape vol.1',
|
||||
'artists': ['Blazo'],
|
||||
'duration': 19.335,
|
||||
'track_id': '1353101989',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -277,6 +294,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311757238,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
'track': 'Kero One - Keep It Alive (Blazo remix)',
|
||||
'release_date': '20110727',
|
||||
'track_id': '38097443',
|
||||
'track_number': 2,
|
||||
'duration': 181.467,
|
||||
'uploader_url': 'https://blazo.bandcamp.com',
|
||||
'album': 'Jazz Format Mixtape vol.1',
|
||||
'uploader_id': 'blazo',
|
||||
'album_artists': ['Blazo'],
|
||||
'artists': ['Blazo'],
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
|
||||
'release_timestamp': 1311724800.0,
|
||||
},
|
||||
},
|
||||
],
|
||||
@@ -284,6 +313,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'title': 'Jazz Format Mixtape vol.1',
|
||||
'id': 'jazz-format-mixtape-vol-1',
|
||||
'uploader_id': 'blazo',
|
||||
'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
@@ -358,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://bandcamp.com/?show=224',
|
||||
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||
'md5': '61acc9a002bed93986b91168aa3ab433',
|
||||
'info_dict': {
|
||||
'id': '224',
|
||||
'ext': 'opus',
|
||||
'ext': 'mp3',
|
||||
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||
'duration': 5829.77,
|
||||
@@ -371,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'episode_id': '224',
|
||||
},
|
||||
'params': {
|
||||
'format': 'opus-lo',
|
||||
'format': 'mp3-128',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
@@ -459,7 +489,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
'title': 'Discography of coldworldofficial',
|
||||
@@ -473,12 +503,19 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _yield_items(self, webpage):
|
||||
yield from (
|
||||
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
yield from traverse_obj(webpage, (
|
||||
{find_element(id='music-grid', html=True)}, {extract_attributes},
|
||||
'data-client-items', {json.loads}, ..., 'page_url', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader)
|
||||
|
||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
||||
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||
getter=urljoin(url))
|
||||
|
||||
437
yt_dlp/extractor/bandlab.py
Normal file
437
yt_dlp/extractor/bandlab.py
Normal file
@@ -0,0 +1,437 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
truncate_string,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class BandlabBaseIE(InfoExtractor):
|
||||
def _call_api(self, endpoint, asset_id, **kwargs):
|
||||
headers = kwargs.pop('headers', None) or {}
|
||||
return self._download_json(
|
||||
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
|
||||
asset_id, headers={
|
||||
'accept': 'application/json',
|
||||
'referer': 'https://www.bandlab.com/',
|
||||
'x-client-id': 'BandLab-Web',
|
||||
'x-client-version': '10.1.124',
|
||||
**headers,
|
||||
}, **kwargs)
|
||||
|
||||
def _parse_revision(self, revision_data, url=None):
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
'media_type': 'revision',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(revision_data, {
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
|
||||
'id': (('revisionId', 'id'), {str}, any),
|
||||
'title': ('song', 'name', {str}),
|
||||
'track': ('song', 'name', {str}),
|
||||
'url': ('mixdown', 'file', {url_or_none}),
|
||||
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
|
||||
'description': ('description', {str}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
'duration': ('mixdown', 'duration', {float_or_none}),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'genres': ('genres', ..., 'name', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _parse_track(self, track_data, url=None):
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
'media_type': 'track',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(track_data, {
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
|
||||
'id': (('revisionId', 'id'), {str}, any),
|
||||
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
|
||||
'title': ('track', 'name', {str}),
|
||||
'track': ('track', 'name', {str}),
|
||||
'description': ('caption', {str}),
|
||||
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'duration': ('track', 'sample', 'duration', {float_or_none}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _parse_video(self, video_data, url=None):
|
||||
return {
|
||||
'media_type': 'video',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(video_data, {
|
||||
'id': ('id', {str}),
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
|
||||
'url': ('video', 'url', {url_or_none}),
|
||||
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
'description': ('caption', {str}),
|
||||
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
|
||||
'view_count': ('video', 'counters', 'plays', {int_or_none}),
|
||||
'like_count': ('video', 'counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'duration': ('video', 'duration', {float_or_none}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class BandlabIE(BandlabBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
|
||||
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
|
||||
'info_dict': {
|
||||
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'ext': 'm4a',
|
||||
'uploader_id': 'ender_milze',
|
||||
'track': 'sweet black',
|
||||
'description': 'composed by juanjn3737',
|
||||
'timestamp': 1702171963,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 54.629999999999995,
|
||||
'title': 'sweet black',
|
||||
'upload_date': '20231210',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'genres': ['Lofi'],
|
||||
'uploader': 'ender milze',
|
||||
'comment_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Same track as above but post URL
|
||||
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
|
||||
'info_dict': {
|
||||
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'ext': 'm4a',
|
||||
'uploader_id': 'ender_milze',
|
||||
'track': 'sweet black',
|
||||
'description': 'composed by juanjn3737',
|
||||
'timestamp': 1702171973,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 54.629999999999995,
|
||||
'title': 'sweet black',
|
||||
'upload_date': '20231210',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'genres': ['Lofi'],
|
||||
'uploader': 'ender milze',
|
||||
'comment_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# SharedKey Example
|
||||
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
|
||||
'md5': '15174b57c44440e2a2008be9cae00250',
|
||||
'info_dict': {
|
||||
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
|
||||
'ext': 'm4a',
|
||||
'comment_count': int,
|
||||
'genres': ['Other'],
|
||||
'uploader_id': 'user8353034818103753',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
|
||||
'timestamp': 1709625771,
|
||||
'track': 'PodcastMaerchen4b',
|
||||
'duration': 468.14,
|
||||
'view_count': int,
|
||||
'description': 'Podcast: Neues aus der Märchenwelt',
|
||||
'like_count': int,
|
||||
'upload_date': '20240305',
|
||||
'uploader': 'Erna Wageneder',
|
||||
'title': 'PodcastMaerchen4b',
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Different Revision selected
|
||||
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
|
||||
'md5': '74e055ef9325d63f37088772fbfe4454',
|
||||
'info_dict': {
|
||||
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
|
||||
'ext': 'm4a',
|
||||
'timestamp': 1588273294,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
|
||||
'description': 'Final Revision.',
|
||||
'title': 'Replay ( Instrumental)',
|
||||
'uploader': 'David R Sparks',
|
||||
'uploader_id': 'davesnothome69',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'track': 'Replay ( Instrumental)',
|
||||
'genres': ['Rock'],
|
||||
'upload_date': '20200430',
|
||||
'like_count': int,
|
||||
'duration': 279.43,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Video
|
||||
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
|
||||
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
|
||||
'info_dict': {
|
||||
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
|
||||
'ext': 'mp4',
|
||||
'duration': 44.705,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
|
||||
'comment_count': int,
|
||||
'title': 'backing vocals',
|
||||
'uploader_id': 'marliashya',
|
||||
'uploader': 'auraa',
|
||||
'like_count': int,
|
||||
'description': 'backing vocals',
|
||||
'media_type': 'video',
|
||||
},
|
||||
}, {
|
||||
# Embed Example
|
||||
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
|
||||
'info_dict': {
|
||||
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'ext': 'm4a',
|
||||
'comment_count': int,
|
||||
'genres': ['Electronic'],
|
||||
'uploader': 'Charlie Henson',
|
||||
'timestamp': 1587328674,
|
||||
'upload_date': '20200419',
|
||||
'view_count': int,
|
||||
'track': 'Positronic Meltdown',
|
||||
'duration': 318.55,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
|
||||
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
|
||||
'uploader_id': 'microfreaks',
|
||||
'title': 'Positronic Meltdown',
|
||||
'like_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Track without revisions available
|
||||
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
|
||||
'md5': 'f05d68a3769952c2d9257c473e14c15f',
|
||||
'info_dict': {
|
||||
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
|
||||
'ext': 'm4a',
|
||||
'track': 'insame',
|
||||
'like_count': int,
|
||||
'duration': 84.03,
|
||||
'title': 'insame',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'Sorakime',
|
||||
'uploader_id': 'sorakime',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
|
||||
'timestamp': 1691162128,
|
||||
'upload_date': '20230804',
|
||||
'media_type': 'track',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://phantomluigi.github.io/',
|
||||
'info_dict': {
|
||||
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
|
||||
'ext': 'm4a',
|
||||
'view_count': int,
|
||||
'upload_date': '20240913',
|
||||
'uploader_id': 'phantommusicofficial',
|
||||
'timestamp': 1726194897,
|
||||
'uploader': 'Phantom',
|
||||
'comment_count': int,
|
||||
'genres': ['Progresive Rock'],
|
||||
'description': 'md5:a38cd668f7a2843295ef284114f18429',
|
||||
'duration': 225.23,
|
||||
'like_count': int,
|
||||
'title': 'Vermilion Pt. 2 (Cover)',
|
||||
'track': 'Vermilion Pt. 2 (Cover)',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
|
||||
|
||||
qs = parse_qs(url)
|
||||
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
|
||||
if url_type == 'revision':
|
||||
revision_id = display_id
|
||||
|
||||
revision_data = None
|
||||
if not revision_id:
|
||||
post_data = self._call_api(
|
||||
'posts', display_id, note='Downloading post data',
|
||||
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
|
||||
|
||||
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
|
||||
revision_data = traverse_obj(post_data, ('revision', {dict}))
|
||||
|
||||
if not revision_data and not revision_id:
|
||||
post_type = post_data.get('type')
|
||||
if post_type == 'Video':
|
||||
return self._parse_video(post_data, url=url)
|
||||
if post_type == 'Track':
|
||||
return self._parse_track(post_data, url=url)
|
||||
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
|
||||
|
||||
if not revision_data:
|
||||
revision_data = self._call_api(
|
||||
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
|
||||
|
||||
return self._parse_revision(revision_data, url=url)
|
||||
|
||||
|
||||
class BandlabPlaylistIE(BandlabBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'info_dict': {
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
|
||||
'release_date': '20221003',
|
||||
'title': 'Remnants',
|
||||
'album': 'Remnants',
|
||||
'like_count': int,
|
||||
'album_type': 'LP',
|
||||
'description': 'A collection of some feel good, rock hits.',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'uploader': 'David R Sparks',
|
||||
'uploader_id': 'davesnothome69',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
|
||||
'info_dict': {
|
||||
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
|
||||
'timestamp': 1720762659,
|
||||
'view_count': int,
|
||||
'title': 'My Shit 🖤',
|
||||
'uploader_id': 'slytheband',
|
||||
'uploader': '𝓢𝓛𝓨',
|
||||
'upload_date': '20240712',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}, {
|
||||
# Embeds can contain both albums and collections with the same URL pattern. This is an album
|
||||
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
|
||||
'info_dict': {
|
||||
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
|
||||
'release_date': '20230706',
|
||||
'description': 'This is a collection of songs I created when I had an Amiga computer.',
|
||||
'view_count': int,
|
||||
'title': 'Mark Salud The Amiga Collection',
|
||||
'uploader_id': 'mssirmooth1962',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
|
||||
'like_count': int,
|
||||
'uploader': 'Mark Salud',
|
||||
'album': 'Mark Salud The Amiga Collection',
|
||||
'album_type': 'LP',
|
||||
},
|
||||
'playlist_count': 24,
|
||||
}, {
|
||||
# Tracks without revision id
|
||||
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
|
||||
'info_dict': {
|
||||
'like_count': int,
|
||||
'uploader_id': 'sorakime',
|
||||
'comment_count': int,
|
||||
'uploader': 'Sorakime',
|
||||
'view_count': int,
|
||||
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
|
||||
'release_date': '20230812',
|
||||
'title': 'Art',
|
||||
'album': 'Art',
|
||||
'album_type': 'Album',
|
||||
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, album_data):
|
||||
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
|
||||
post_type = post['type']
|
||||
if post_type == 'Revision':
|
||||
yield self._parse_revision(post.get('revision'))
|
||||
elif post_type == 'Track':
|
||||
yield self._parse_track(post)
|
||||
elif post_type == 'Video':
|
||||
yield self._parse_video(post)
|
||||
else:
|
||||
self.report_warning(f'Skipping unknown post type: "{post_type}"')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
|
||||
|
||||
endpoints = {
|
||||
'albums': ['albums'],
|
||||
'collections': ['collections'],
|
||||
'embed': ['collections', 'albums'],
|
||||
}.get(playlist_type)
|
||||
for endpoint in endpoints:
|
||||
playlist_data = self._call_api(
|
||||
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
|
||||
fatal=False, expected_status=404)
|
||||
if not playlist_data.get('errorCode'):
|
||||
playlist_type = endpoint
|
||||
break
|
||||
if error_code := playlist_data.get('errorCode'):
|
||||
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_data), playlist_id,
|
||||
**traverse_obj(playlist_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
|
||||
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
}),
|
||||
**(traverse_obj(playlist_data, {
|
||||
'album': ('name', {str}),
|
||||
'album_type': ('type', {str}),
|
||||
}) if playlist_type == 'albums' else {}))
|
||||
@@ -1284,9 +1284,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1386,7 +1386,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'tbr': ('bitrate', {int_or_none(scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
@@ -1398,7 +1398,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
@@ -1428,7 +1428,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
'timestamp', {int_or_none(scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
68
yt_dlp/extractor/beacon.py
Normal file
68
yt_dlp/extractor/beacon.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BeaconTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://beacon.tv/content/welcome-to-beacon',
|
||||
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
|
||||
'info_dict': {
|
||||
'id': 'welcome-to-beacon',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240509',
|
||||
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
|
||||
'title': 'Your home for Critical Role!',
|
||||
'timestamp': 1715227200,
|
||||
'duration': 105.494,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
|
||||
'md5': 'd879b091485dbed2245094c8152afd89',
|
||||
'info_dict': {
|
||||
'id': 're-slayers-take-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Re-Slayer’s Take | Official Trailer',
|
||||
'timestamp': 1715189040,
|
||||
'upload_date': '20240508',
|
||||
'duration': 53.249,
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', '__APOLLO_STATE__',
|
||||
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
|
||||
if not content_data:
|
||||
raise ExtractorError('Failed to extract content data')
|
||||
|
||||
jwplayer_data = traverse_obj(content_data, (
|
||||
(('contentVideo', 'video', 'videoData'),
|
||||
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
|
||||
if not jwplayer_data:
|
||||
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
|
||||
raise ExtractorError('Content is not a video/podcast', expected=True)
|
||||
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
|
||||
self.raise_login_required('This video/podcast is for members only')
|
||||
raise ExtractorError('Failed to extract content')
|
||||
|
||||
return {
|
||||
**self._parse_jwplayer_data(jwplayer_data, video_id),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
@@ -1,18 +1,33 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes
|
||||
from ..utils import ExtractorError, extract_attributes
|
||||
|
||||
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
|
||||
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _brightcove_url_result(self, video_id, video_block):
|
||||
account_id = video_block.get('accountid') or '876450612001'
|
||||
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
||||
def _extract_video(self, video_block):
|
||||
video_element = self._search_regex(
|
||||
self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
|
||||
if video_element:
|
||||
video_element_attrs = extract_attributes(video_element)
|
||||
video_id = video_element_attrs.get('data-video-id')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_element_attrs.get('data-account') or '876450610001'
|
||||
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
|
||||
else:
|
||||
video_block_attrs = extract_attributes(video_block)
|
||||
video_id = video_block_attrs.get('videoid')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_block_attrs.get('accountid') or '876630703001'
|
||||
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
@@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video_block = extract_attributes(self._search_regex(
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
class BFMTVLiveIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:live'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/en-direct/',
|
||||
'info_dict': {
|
||||
'id': '5615950982001',
|
||||
'id': '6346069778112',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20220926',
|
||||
'timestamp': 1664207191,
|
||||
'upload_date': '20240202',
|
||||
'timestamp': 1706887572,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://.+/image\.jpg',
|
||||
'tags': [],
|
||||
@@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVArticleIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:article'
|
||||
@@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, webpage):
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video = self._extract_video(video_block_el)
|
||||
if video:
|
||||
yield video
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
|
||||
entries = []
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video_block = extract_attributes(video_block_el)
|
||||
video_id = video_block.get('videoid')
|
||||
if not video_id:
|
||||
continue
|
||||
entries.append(self._brightcove_url_result(video_id, video_block))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -50,7 +49,7 @@ class BibelTVBaseIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||
'season_number': 'seasonNumber',
|
||||
'episode_number': 'episodeNumber',
|
||||
|
||||
@@ -18,7 +18,6 @@ from ..utils import (
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
@@ -46,6 +45,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||
_wbi_key_cache = {}
|
||||
@@ -62,7 +62,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||
if missing_formats:
|
||||
self.to_screen(
|
||||
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||
f'Format(s) {missing_formats} are missing; you have to '
|
||||
f'become a premium member to download them. {self._login_hint()}')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
@@ -108,7 +108,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('length', {float_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}))
|
||||
if fragments:
|
||||
@@ -123,7 +123,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'quality': ('quality', {int_or_none}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('timelength', {float_or_none(scale=1000)}),
|
||||
}),
|
||||
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||
})
|
||||
@@ -164,14 +164,18 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||
return params
|
||||
|
||||
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||
if qn:
|
||||
params['qn'] = qn
|
||||
def _download_playinfo(self, bvid, cid, headers=None, query=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})}
|
||||
if self.is_logged_in:
|
||||
params.pop('try_look', None)
|
||||
if qn := params.get('qn'):
|
||||
note = f'Downloading video format {qn} for cid {cid}'
|
||||
else:
|
||||
note = f'Downloading video formats for cid {cid}'
|
||||
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||
query=self._sign_wbi(params, bvid), headers=headers,
|
||||
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||
query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -190,9 +194,9 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}')
|
||||
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
self.report_warning(
|
||||
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||
@@ -206,8 +210,8 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False)
|
||||
'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False, headers=self._HEADERS)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
'start_time': 'from',
|
||||
@@ -285,7 +289,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
@@ -638,40 +642,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
headers['Referer'] = url
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
play_info_obj = self._search_json(
|
||||
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
|
||||
if not play_info_obj:
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
play_info = traverse_obj(play_info_obj, ('data', {dict}))
|
||||
if not play_info:
|
||||
if traverse_obj(play_info_obj, 'code') == 87007:
|
||||
toast = get_element_by_class('tips-toast', webpage) or ''
|
||||
msg = clean_html(
|
||||
f'{get_element_by_class("belongs-to", toast) or ""},'
|
||||
+ (get_element_by_class('level', toast) or ''))
|
||||
raise ExtractorError(
|
||||
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
|
||||
raise ExtractorError('Failed to extract play info')
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
page_list_json = not is_festival and traverse_obj(
|
||||
page_list_json = (not is_festival and traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
note='Extracting videos in anthology', headers=headers),
|
||||
'data', expected_type=list) or []
|
||||
'data', expected_type=list)) or []
|
||||
is_anthology = len(page_list_json) > 1
|
||||
|
||||
part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
|
||||
@@ -690,8 +683,6 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
|
||||
@@ -726,62 +717,79 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if not traverse_obj(play_info, ('dash')):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
play_info = None
|
||||
if self.is_logged_in:
|
||||
play_info = traverse_obj(
|
||||
self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None),
|
||||
('data', {dict}))
|
||||
if not play_info:
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
if video_data.get('is_upower_exclusive'):
|
||||
high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
|
||||
msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim=",")}. {self._login_hint()}'
|
||||
if not formats:
|
||||
raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True)
|
||||
if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})):
|
||||
self.report_warning(
|
||||
f'This is a supporter-only video, only the preview will be extracted: {msg}',
|
||||
video_id=video_id)
|
||||
|
||||
if not traverse_obj(play_info, 'dash'):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
@@ -859,10 +867,16 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
headers['Referer'] = url
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
|
||||
play_info = (
|
||||
self._search_json(
|
||||
r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id,
|
||||
end_pattern='\n', default=None)
|
||||
or self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
|
||||
headers=headers))
|
||||
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
@@ -1021,8 +1035,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BilibiliCheeseBaseIE(BilibiliBaseIE):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _extract_episode(self, season_info, ep_id):
|
||||
episode_info = traverse_obj(season_info, (
|
||||
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
|
||||
@@ -1586,7 +1598,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
|
||||
'timestamp': ('ctime', {int_or_none}, filter),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
@@ -1852,7 +1864,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
|
||||
388
yt_dlp/extractor/bluesky.py
Normal file
388
yt_dlp/extractor/bluesky.py
Normal file
@@ -0,0 +1,388 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
truncate_string,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BlueskyIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
|
||||
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
|
||||
'md5': '375539c1930ab05d15585ed772ab54fd',
|
||||
'info_dict': {
|
||||
'id': '3l4omssdl632g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Blu3Blu3Lilith',
|
||||
'uploader_id': 'blu3blue.bsky.social',
|
||||
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
|
||||
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'OMG WE HAVE VIDEOS NOW',
|
||||
'description': 'OMG WE HAVE VIDEOS NOW',
|
||||
'upload_date': '20240921',
|
||||
'timestamp': 1726940605,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
|
||||
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
|
||||
'info_dict': {
|
||||
'id': '3l4qhp7bcs52c',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'souris',
|
||||
'uploader_id': 'souris.moe',
|
||||
'uploader_url': 'https://bsky.app/profile/souris.moe',
|
||||
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l4qhp7bcs52c',
|
||||
'upload_date': '20240922',
|
||||
'timestamp': 1727003838,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
|
||||
'info_dict': {
|
||||
'id': '3l3w4tnezek2e',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'clean',
|
||||
'uploader_id': 'de1.pds.tentacle.expert',
|
||||
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
|
||||
'channel_id': 'did:web:de1.tentacle.expert',
|
||||
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l3w4tnezek2e',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726098823,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
|
||||
'info_dict': {
|
||||
'id': 'XxK3t_5V3ao',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'yunayu',
|
||||
'uploader_id': '@yunayuispink',
|
||||
'uploader_url': 'https://www.youtube.com/@yunayuispink',
|
||||
'channel': 'yunayu',
|
||||
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
|
||||
'description': r're:Have a good goodx10000day',
|
||||
'title': '5min vs 5hours drawing',
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'upload_date': '20241026',
|
||||
'timestamp': 1729967784,
|
||||
'duration': 321,
|
||||
'age_limit': 0,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': [],
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
|
||||
'info_dict': {
|
||||
'id': '222792849',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'LASERBAT',
|
||||
'uploader_id': 'laserbatx',
|
||||
'uploader_url': 'https://laserbatx.bandcamp.com',
|
||||
'artists': ['LASERBAT'],
|
||||
'album_artists': ['LASERBAT'],
|
||||
'album': 'Hari Nezumi [EP]',
|
||||
'track': 'Forward to the End',
|
||||
'title': 'LASERBAT - Forward to the End',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
|
||||
'duration': 228.571,
|
||||
'track_id': '222792849',
|
||||
'release_date': '20230423',
|
||||
'upload_date': '20230423',
|
||||
'timestamp': 1682276040.0,
|
||||
'release_timestamp': 1682276040.0,
|
||||
'track_number': 1,
|
||||
},
|
||||
'add_ie': ['Bandcamp'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
|
||||
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||
'info_dict': {
|
||||
'id': '3l7rdfxhyds2f',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'cinnamon',
|
||||
'uploader_id': 'alt.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
|
||||
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'crazy that i look like this tbh',
|
||||
'description': 'crazy that i look like this tbh',
|
||||
'upload_date': '20241030',
|
||||
'timestamp': 1730332128,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['sexual'],
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
|
||||
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
|
||||
'info_dict': {
|
||||
'id': '3l6zrz6zyl2dr',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'mary🐇',
|
||||
'uploader_id': 'mary.my.id',
|
||||
'uploader_url': 'https://bsky.app/profile/mary.my.id',
|
||||
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l6zrz6zyl2dr',
|
||||
'upload_date': '20241021',
|
||||
'timestamp': 1729523172,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241026',
|
||||
'description': 'One of my favorite videos',
|
||||
'comment_count': int,
|
||||
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
|
||||
'uploader': 'Purple.Ice.Tea',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'like_count': int,
|
||||
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'repost_count': int,
|
||||
'timestamp': 1729973202,
|
||||
'tags': [],
|
||||
'uploader_id': 'purpleicetea.bsky.social',
|
||||
'title': 'One of my favorite videos',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3l77u64l7le2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'hearing people on twitter say that bluesky isn\'...',
|
||||
'like_count': int,
|
||||
'uploader_id': 'thafnine.net',
|
||||
'uploader_url': 'https://bsky.app/profile/thafnine.net',
|
||||
'upload_date': '20241024',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
|
||||
'tags': [],
|
||||
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
|
||||
'uploader': 'T9',
|
||||
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'timestamp': 1729731642,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}],
|
||||
}]
|
||||
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
|
||||
|
||||
def _get_service_endpoint(self, did, video_id):
|
||||
if did.startswith('did:web:'):
|
||||
url = f'https://{did[8:]}/.well-known/did.json'
|
||||
else:
|
||||
url = f'https://plc.directory/{did}'
|
||||
services = self._download_json(
|
||||
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
|
||||
return traverse_obj(
|
||||
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
|
||||
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
|
||||
|
||||
def _real_extract(self, url):
|
||||
handle, video_id = self._match_valid_url(url).group('handle', 'id')
|
||||
|
||||
post = self._download_json(
|
||||
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
|
||||
video_id, query={
|
||||
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
|
||||
'depth': 0,
|
||||
'parentHeight': 0,
|
||||
})['thread']['post']
|
||||
|
||||
entries = []
|
||||
# app.bsky.embed.video.view/app.bsky.embed.external.view
|
||||
entries.extend(self._extract_videos(post, video_id))
|
||||
# app.bsky.embed.recordWithMedia.view
|
||||
entries.extend(self._extract_videos(
|
||||
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
|
||||
# app.bsky.embed.record.view
|
||||
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
|
||||
entries.extend(self._extract_videos(
|
||||
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
|
||||
|
||||
if not entries:
|
||||
raise ExtractorError('No video could be found in this post', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
@staticmethod
|
||||
def _build_profile_url(path):
|
||||
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
|
||||
|
||||
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
|
||||
embed_path = variadic(embed_path, (str, bytes, dict, set))
|
||||
record_path = variadic(record_path, (str, bytes, dict, set))
|
||||
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
|
||||
|
||||
entries = []
|
||||
if external_uri := traverse_obj(root, (
|
||||
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
|
||||
entries.append(self.url_result(external_uri))
|
||||
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
return entries
|
||||
|
||||
video_cid = traverse_obj(
|
||||
root, (*embed_path, 'cid', {str}),
|
||||
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
|
||||
did = traverse_obj(root, ('author', 'did', {str}))
|
||||
|
||||
if did and video_cid:
|
||||
endpoint = self._get_service_endpoint(did, video_id)
|
||||
|
||||
formats.append({
|
||||
'format_id': 'blob',
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'ext': ('mimeType', {mimetype2ext}),
|
||||
})),
|
||||
})
|
||||
|
||||
for sub_data in traverse_obj(root, (
|
||||
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
|
||||
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(root, {
|
||||
'id': ('uri', {url_basename}),
|
||||
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
|
||||
'alt_title': (*embed_path, 'alt', {str}, filter),
|
||||
'uploader': ('author', 'displayName', {str}),
|
||||
'uploader_id': ('author', 'handle', {str}),
|
||||
'uploader_url': ('author', 'handle', {self._build_profile_url}),
|
||||
'channel_id': ('author', 'did', {str}),
|
||||
'channel_url': ('author', 'did', {self._build_profile_url}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'repost_count': ('repostCount', {int_or_none}),
|
||||
'comment_count': ('replyCount', {int_or_none}),
|
||||
'timestamp': ('indexedAt', {parse_iso8601}),
|
||||
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
|
||||
'age_limit': (
|
||||
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
|
||||
'description': (*record_path, 'text', {str}, filter),
|
||||
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
}),
|
||||
})
|
||||
return entries
|
||||
@@ -1,35 +1,20 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
@@ -41,12 +26,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'creators': ['Kooperative Berlin'],
|
||||
'description': r're:Joachim Gauck, .*\n\nKamera: .*',
|
||||
'release_date': '20150716',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https?://www\.bpb\.de/cache/images/7/297_teaser_16x9_1240\.jpg.*',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -55,11 +40,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/4/522184_teaser_16x9_1240\.png.*',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -68,11 +54,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/9/518789_teaser_16x9_1240\.jpeg.*',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -84,12 +71,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'creators': ['Axel Schröder'],
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/3/315813_teaser_16x9_1240\.png.*',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -98,12 +85,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'creators': ['Bundeszentrale für politische Bildung'],
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/6/517806_teaser_16x9_1240\.png.*',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -147,7 +134,7 @@ class BpbIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
title_result = traverse_obj(webpage, ({find_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
@@ -156,15 +143,15 @@ class BpbIE(InfoExtractor):
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
{find_element(cls='opening-intro')},
|
||||
[{find_element(tag='bpb-accordion-item')}, {find_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'creators': traverse_obj(self._html_search_meta('author', webpage), all),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('poster', {urljoin(url)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
@@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
|
||||
@@ -31,6 +31,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
@@ -935,8 +936,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
|
||||
if content_type == 'playlist':
|
||||
return self.playlist_result(
|
||||
[self._parse_brightcove_metadata(vid, vid.get('id'), headers)
|
||||
for vid in json_data.get('videos', []) if vid.get('id')],
|
||||
(self._parse_brightcove_metadata(vid, vid['id'], headers)
|
||||
for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))),
|
||||
json_data.get('id'), json_data.get('name'),
|
||||
json_data.get('description'))
|
||||
|
||||
|
||||
@@ -8,11 +8,13 @@ from ..utils import (
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
format_field,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BundestagIE(InfoExtractor):
|
||||
@@ -115,9 +117,8 @@ class BundestagIE(InfoExtractor):
|
||||
note='Downloading metadata overlay', fatal=False,
|
||||
), {
|
||||
'title': (
|
||||
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||
{find_element(tag='h3')}, {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({find_element(tag='p')}, {clean_html}),
|
||||
}))
|
||||
|
||||
return result
|
||||
|
||||
@@ -53,7 +53,7 @@ class CaffeineTVIE(InfoExtractor):
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}, {lambda x: x or None}),
|
||||
'tags': ('tags', ..., {str}, filter),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': (((None, 'user'), 'username'), {str}, any),
|
||||
'is_live': ('is_live', {bool}),
|
||||
@@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
|
||||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('preview_image_path', {urljoin(url)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
|
||||
@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
|
||||
@@ -4,7 +4,6 @@ import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
@@ -12,7 +11,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
@@ -455,8 +453,8 @@ class CBCPlayerIE(InfoExtractor):
|
||||
|
||||
chapters = traverse_obj(data, (
|
||||
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
|
||||
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
'title': ('name', {str}),
|
||||
}))
|
||||
# Filter out pointless single chapters with start_time==0 and no end_time
|
||||
@@ -467,8 +465,8 @@ class CBCPlayerIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
|
||||
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
|
||||
'media_type': ('media', 'clipType', {str}),
|
||||
'series': ('showName', {str}),
|
||||
'season_number': ('media', 'season', {int_or_none}),
|
||||
@@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# This is a normal, public, TV show video
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s06e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoke Signals',
|
||||
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'duration': 1314,
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
||||
'duration': 1324,
|
||||
'categories': ['comedy'],
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season': 'Season 6',
|
||||
@@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode': 'Smoke Signals',
|
||||
'episode_number': 1,
|
||||
'episode_id': 'schitts-creek/s06e01',
|
||||
'upload_date': '20210618',
|
||||
'timestamp': 1623988800,
|
||||
'release_date': '20200107',
|
||||
'release_timestamp': 1578427200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# This video requires an account in the browser, but works fine in yt-dlp
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
|
||||
'md5': '297a9600f554f2258aed01514226a697',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s01e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Cup Runneth Over',
|
||||
'description': 'md5:9bca14ea49ab808097530eb05a29e797',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
@@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode_id': 'schitts-creek/s01e01',
|
||||
'duration': 1309,
|
||||
'categories': ['comedy'],
|
||||
'upload_date': '20210617',
|
||||
'timestamp': 1623902400,
|
||||
'release_date': '20151124',
|
||||
'release_timestamp': 1448323200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
|
||||
'only_matching': True,
|
||||
@@ -631,38 +633,6 @@ class CBCGemIE(InfoExtractor):
|
||||
return
|
||||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||
|
||||
def _find_secret_formats(self, formats, video_id):
|
||||
""" Find a valid video url and convert it to the secret variant """
|
||||
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
|
||||
if not base_format:
|
||||
return
|
||||
|
||||
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
|
||||
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
|
||||
|
||||
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
|
||||
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
|
||||
return
|
||||
|
||||
for child in secret_xml:
|
||||
if child.attrib.get('Type') != 'video':
|
||||
continue
|
||||
for video_quality in child:
|
||||
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
|
||||
if not bitrate or 'Index' not in video_quality.attrib:
|
||||
continue
|
||||
height = int_or_none(video_quality.attrib.get('MaxHeight'))
|
||||
|
||||
yield {
|
||||
**base_format,
|
||||
'format_id': join_nonempty('sec', height),
|
||||
# Note: \g<1> is necessary instead of \1 since bitrate is a number
|
||||
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
|
||||
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
|
||||
'tbr': bitrate / 1000.0,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json(
|
||||
@@ -676,7 +646,6 @@ class CBCGemIE(InfoExtractor):
|
||||
else:
|
||||
headers = {}
|
||||
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
||||
m3u8_url = m3u8_info.get('url')
|
||||
|
||||
if m3u8_info.get('errorCode') == 1:
|
||||
self.raise_geo_restricted(countries=['CA'])
|
||||
@@ -685,9 +654,9 @@ class CBCGemIE(InfoExtractor):
|
||||
elif m3u8_info.get('errorCode') != 0:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
||||
self._remove_duplicate_formats(formats)
|
||||
formats.extend(self._find_secret_formats(formats, video_id))
|
||||
|
||||
for fmt in formats:
|
||||
if fmt.get('vcodec') == 'none':
|
||||
@@ -703,20 +672,21 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'series': video_info.get('series'),
|
||||
'season_number': video_info.get('season'),
|
||||
'season': f'Season {video_info.get("season")}',
|
||||
'episode_number': video_info.get('episode'),
|
||||
'episode': video_info.get('title'),
|
||||
'episode_id': video_id,
|
||||
'duration': video_info.get('duration'),
|
||||
'categories': [video_info.get('category')],
|
||||
'formats': formats,
|
||||
'release_timestamp': video_info.get('airDate'),
|
||||
'timestamp': video_info.get('availableDate'),
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'series': ('series', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'categories': ('category', {str}, all),
|
||||
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ class CBSNewsBaseIE(InfoExtractor):
|
||||
**traverse_obj(item, {
|
||||
'title': (None, ('fulltitle', 'title')),
|
||||
'description': 'dek',
|
||||
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
|
||||
'timestamp': ('timestamp', {float_or_none(scale=1000)}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'subtitles': ('captions', {get_subtitles}),
|
||||
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
|
||||
|
||||
@@ -12,53 +12,86 @@ from ..utils import (
|
||||
|
||||
|
||||
class CCMAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
|
||||
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||
'info_dict': {
|
||||
'id': '5630208',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
'alt_title': 'EsportMarató2016WEB_PerPublicar',
|
||||
'duration': 79,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
|
||||
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'categories': ['Divulgació'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
'md5': 'fa3e38f269329a278271276330261425',
|
||||
'info_dict': {
|
||||
'id': '943685',
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'upload_date': '20161217',
|
||||
'timestamp': 1482011700,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
'series': 'Tot gira',
|
||||
'duration': 821,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
|
||||
'md5': '27493513d08a3e5605814aee9bb778d2',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'timestamp': 1582577919,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'subtitles': 'mincount:1',
|
||||
'age_limit': 13,
|
||||
'series': 'Crims',
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
|
||||
'duration': 3203,
|
||||
'categories': ['Divulgació'],
|
||||
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
|
||||
'info_dict': {
|
||||
'id': '5759227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Una mosca volava per la llum',
|
||||
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
|
||||
'description': 'md5:9ab64276944b0825336f4147f13f7854',
|
||||
'series': 'Mic',
|
||||
'upload_date': '20180411',
|
||||
'timestamp': 1523440105,
|
||||
'duration': 160,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
|
||||
'categories': ['Música'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_type, media_id = self._match_valid_url(url).groups()
|
||||
media_type, media_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
media = self._download_json(
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
|
||||
@@ -12,6 +12,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_ord
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
@@ -351,3 +352,50 @@ class CDAIE(InfoExtractor):
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
'info_dict': {
|
||||
'id': '31188385',
|
||||
'title': 'SERIA DRUGA',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
|
||||
'info_dict': {
|
||||
'id': '2664592',
|
||||
'title': 'VideoDowcipy - wszystkie odcinki',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
|
||||
'info_dict': {
|
||||
'id': '19129979',
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
|
||||
|
||||
webpage = self._download_webpage(url, folder_id)
|
||||
|
||||
def extract_page_entries(page):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
|
||||
f'Downloading page {page + 1}', expected_status=404)
|
||||
items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
|
||||
for video_id in items:
|
||||
yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
|
||||
folder_id, self._og_search_title(webpage))
|
||||
|
||||
@@ -5,11 +5,12 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
lowercase_escape,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class ChaturbateIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.chaturbate.com/siswet19/',
|
||||
'info_dict': {
|
||||
@@ -29,16 +30,58 @@ class ChaturbateIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://en.chaturbate.com/siswet19/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.eu/siswet19/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.global/siswet19/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_ROOM_OFFLINE = 'Room is currently offline'
|
||||
_ERROR_MAP = {
|
||||
'offline': 'Room is currently offline',
|
||||
'private': 'Room is currently in a private show',
|
||||
'away': 'Performer is currently away',
|
||||
'password protected': 'Room is password protected',
|
||||
'hidden': 'Hidden session in progress',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
def _extract_from_api(self, video_id, tld):
|
||||
response = self._download_json(
|
||||
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
|
||||
data=urlencode_postdata({'room_slug': video_id}),
|
||||
headers={
|
||||
**self.geo_verification_headers(),
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Accept': 'application/json',
|
||||
}, fatal=False, impersonate=True) or {}
|
||||
|
||||
m3u8_url = response.get('url')
|
||||
if not m3u8_url:
|
||||
status = response.get('room_status')
|
||||
if error := self._ERROR_MAP.get(status):
|
||||
raise ExtractorError(error, expected=True)
|
||||
if status == 'public':
|
||||
self.raise_geo_restricted()
|
||||
self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction')
|
||||
return None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
|
||||
'is_live': True,
|
||||
'age_limit': 18,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
|
||||
}
|
||||
|
||||
def _extract_from_html(self, video_id, tld):
|
||||
webpage = self._download_webpage(
|
||||
f'https://chaturbate.com/{video_id}/', video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
f'https://chaturbate.{tld}/{video_id}/', video_id,
|
||||
headers=self.geo_verification_headers(), impersonate=True)
|
||||
|
||||
found_m3u8_urls = []
|
||||
|
||||
@@ -76,8 +119,8 @@ class ChaturbateIE(InfoExtractor):
|
||||
webpage, 'error', group='error', default=None)
|
||||
if not error:
|
||||
if any(p in webpage for p in (
|
||||
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
|
||||
error = self._ROOM_OFFLINE
|
||||
self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
|
||||
error = self._ERROR_MAP['offline']
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('Unable to find stream URL')
|
||||
@@ -104,3 +147,7 @@ class ChaturbateIE(InfoExtractor):
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, tld = self._match_valid_url(url).group('id', 'tld')
|
||||
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
@@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
@@ -146,23 +144,37 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
|
||||
video_status = video_meta.get('vodStatus')
|
||||
if video_status == 'UPLOAD':
|
||||
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
|
||||
elif video_status == 'ABR_HLS':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
|
||||
video_id, query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
})
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
|
||||
formats, subtitles = [], {}
|
||||
live_status = 'post_live' if live_status == 'was_live' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'timestamp': ('publishDateAt', {float_or_none(scale=1000)}),
|
||||
'view_count': ('readCount', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
@@ -85,7 +86,7 @@ class CineverseIE(CineverseBaseIE):
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
|
||||
@@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
|
||||
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
@@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||
'ext': 'mp4',
|
||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
@@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': '0e8e040aec776862e1d632a699edf59e',
|
||||
'ext': 'mp4',
|
||||
'title': '0e8e040aec776862e1d632a699edf59e',
|
||||
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
@@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'ext': 'mp4',
|
||||
'title': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
@@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
|
||||
video_id, domain = self._match_valid_url(url).group('id', 'domain')
|
||||
if domain != 'bytehighway.net':
|
||||
domain = 'cloudflarestream.com'
|
||||
base_url = f'https://{domain}/{video_id}/'
|
||||
if '.' in video_id:
|
||||
video_id = self._parse_json(base64.urlsafe_b64decode(
|
||||
|
||||
@@ -1,146 +1,225 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import merge_dicts, try_call, url_basename
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_call,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_elements, traverse_obj
|
||||
|
||||
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'info_dict': {
|
||||
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
|
||||
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
'upload_date': '20240531',
|
||||
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
|
||||
'duration': 373.0,
|
||||
'timestamp': 1717148586,
|
||||
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
|
||||
'modified_date': '20240531',
|
||||
'modified_timestamp': 1717150140,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'info_dict': {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
|
||||
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'ext': 'mp4',
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
|
||||
'title': 'Here’s how some inmates in closely divided state are now able to vote from jail',
|
||||
'timestamp': 1718158269,
|
||||
'upload_date': '20240612',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
|
||||
'duration': 202.0,
|
||||
'modified_date': '20240612',
|
||||
'modified_timestamp': 1718158509,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
|
||||
'info_dict': {
|
||||
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
|
||||
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
|
||||
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
|
||||
'title': 'Video shows King Charles\' portrait being vandalized by activists',
|
||||
'timestamp': 1718113852,
|
||||
'upload_date': '20240611',
|
||||
'duration': 51.0,
|
||||
'modified_timestamp': 1718116193,
|
||||
'modified_date': '20240611',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
|
||||
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
|
||||
'duration': 158.0,
|
||||
'title': 'Robin Meade signs off after HLN\'s last broadcast',
|
||||
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670284296,
|
||||
'modified_timestamp': 1670332404,
|
||||
'modified_date': '20221206',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'info_dict': {
|
||||
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'timestamp': 1729501452,
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
|
||||
'description': 'md5:256ee7137d161f776cda429654135e52',
|
||||
'upload_date': '20241021',
|
||||
'duration': 31.0,
|
||||
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
|
||||
'modified_date': '20241021',
|
||||
'modified_timestamp': 1729501530,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
|
||||
'info_dict': {
|
||||
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
|
||||
'info_dict': {
|
||||
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
|
||||
'ext': 'mp4',
|
||||
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
|
||||
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
|
||||
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
|
||||
'duration': 173.0,
|
||||
'timestamp': 1729122182,
|
||||
'upload_date': '20241016',
|
||||
'modified_timestamp': 1729194706,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'md5': '11604ab4af83b650826753f1ccb8ecff',
|
||||
'info_dict': {
|
||||
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
|
||||
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
|
||||
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
|
||||
'duration': 145.0,
|
||||
'timestamp': 1729137765,
|
||||
'upload_date': '20241017',
|
||||
'modified_timestamp': 1729138184,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}],
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = self._match_valid_url(url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
display_id = self._match_valid_url(url).group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_id = traverse_obj(
|
||||
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
|
||||
('TOP_AUTH_SERVICE_APP_ID', {str}))
|
||||
|
||||
entries = []
|
||||
for player_data in traverse_obj(webpage, (
|
||||
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
|
||||
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
|
||||
media_id = player_data['data-media-id']
|
||||
parent_uri = player_data.get('data-video-resource-parent-uri')
|
||||
formats, subtitles = [], {}
|
||||
|
||||
video_data = {}
|
||||
if parent_uri:
|
||||
video_data = self._download_json(
|
||||
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
|
||||
query={
|
||||
'id': media_id,
|
||||
'stellarUri': parent_uri,
|
||||
})
|
||||
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
|
||||
resolution, bitrate = None, None
|
||||
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
|
||||
resolution, bitrate = mobj.group('res', 'tbr')
|
||||
formats.append({
|
||||
'url': direct_url,
|
||||
'format_id': 'direct',
|
||||
'quality': 1,
|
||||
'tbr': int_or_none(bitrate),
|
||||
**parse_resolution(resolution),
|
||||
})
|
||||
for sub_data in traverse_obj(video_data, (
|
||||
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
|
||||
'url': sub_data['url'],
|
||||
'name': sub_data.get('label'),
|
||||
})
|
||||
|
||||
if app_id:
|
||||
media_data = self._download_json(
|
||||
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
|
||||
query={'appId': app_id})
|
||||
m3u8_url = traverse_obj(media_data, (
|
||||
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
|
||||
if m3u8_url:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
entries.append({
|
||||
**traverse_obj(player_data, {
|
||||
'title': ('data-headline', {clean_html}),
|
||||
'description': ('data-description', {clean_html}),
|
||||
'duration': ('data-duration', {parse_duration}),
|
||||
'timestamp': ('data-publish-date', {parse_iso8601}),
|
||||
'thumbnail': (
|
||||
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
|
||||
{update_url(query='c=original')}),
|
||||
'display_id': 'data-video-slug',
|
||||
}),
|
||||
**traverse_obj(video_data, {
|
||||
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
|
||||
'description': ('description', {clean_html}),
|
||||
'title': ('headline', {str}),
|
||||
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
|
||||
'duration': ('trt', {int_or_none}),
|
||||
}),
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||
_TEST = {
|
||||
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminalizing journalism?',
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
|
||||
class CNNIndonesiaIE(InfoExtractor):
|
||||
|
||||
@@ -25,7 +25,6 @@ import xml.etree.ElementTree
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_os_name,
|
||||
urllib_req_to_req,
|
||||
)
|
||||
from ..cookies import LenientSimpleCookie
|
||||
@@ -35,6 +34,7 @@ from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
TransportError,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
@@ -46,6 +46,7 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
ISO639Utils,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
@@ -277,6 +278,7 @@ class InfoExtractor:
|
||||
thumbnails: A list of dictionaries, with the following entries:
|
||||
* "id" (optional, string) - Thumbnail format ID
|
||||
* "url"
|
||||
* "ext" (optional, string) - actual image extension if not given in URL
|
||||
* "preference" (optional, int) - quality of the image
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
@@ -332,7 +334,7 @@ class InfoExtractor:
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||
average_rating: Average rating given by users, the scale used depends on the webpage
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
properties (all but one of text or html optional):
|
||||
@@ -519,7 +521,7 @@ class InfoExtractor:
|
||||
or _extract_from_webpage as necessary. While these are normally classmethods,
|
||||
_extract_from_webpage is allowed to be an instance method.
|
||||
|
||||
_extract_from_webpage may raise self.StopExtraction() to stop further
|
||||
_extract_from_webpage may raise self.StopExtraction to stop further
|
||||
processing of the webpage and obtain exclusive rights to it. This is useful
|
||||
when the extractor cannot reliably be matched using just the URL,
|
||||
e.g. invidious/peertube instances
|
||||
@@ -572,13 +574,13 @@ class InfoExtractor:
|
||||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}',
|
||||
'password': f'Use {password_hint}',
|
||||
'cookies': (
|
||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
|
||||
'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}',
|
||||
'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}',
|
||||
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
@@ -965,6 +967,9 @@ class InfoExtractor:
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
if content is False:
|
||||
assert not fatal
|
||||
return False
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
@@ -1023,7 +1028,7 @@ class InfoExtractor:
|
||||
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
absfilepath = os.path.abspath(filename)
|
||||
if len(absfilepath) > 259:
|
||||
filename = fR'\\?\{absfilepath}'
|
||||
@@ -1039,7 +1044,15 @@ class InfoExtractor:
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
try:
|
||||
webpage_bytes = urlh.read()
|
||||
except TransportError as err:
|
||||
errmsg = f'{video_id}: Error reading response: {err.msg}'
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, cause=err)
|
||||
self.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
@@ -1396,6 +1409,13 @@ class InfoExtractor:
|
||||
return None, None
|
||||
|
||||
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
||||
|
||||
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
|
||||
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
|
||||
if sys.version_info < (3, 11):
|
||||
return tuple(x if x != '""' else '' for x in info[::2])
|
||||
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
@@ -1558,7 +1578,9 @@ class InfoExtractor:
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
for mobj in re.finditer(JSON_LD_RE, html):
|
||||
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
|
||||
json_ld_item = self._parse_json(
|
||||
mobj.group('json_ld'), video_id, fatal=fatal,
|
||||
errnote=False if default is not NO_DEFAULT else None)
|
||||
for json_ld in variadic(json_ld_item):
|
||||
if isinstance(json_ld, dict):
|
||||
yield json_ld
|
||||
@@ -1698,7 +1720,7 @@ class InfoExtractor:
|
||||
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
|
||||
if rating is not None:
|
||||
info['average_rating'] = rating
|
||||
if is_type(e, 'TVEpisode', 'Episode'):
|
||||
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': episode_name,
|
||||
@@ -1832,12 +1854,26 @@ class InfoExtractor:
|
||||
|
||||
@staticmethod
|
||||
def _remove_duplicate_formats(formats):
|
||||
format_urls = set()
|
||||
seen_urls = set()
|
||||
seen_fragment_urls = set()
|
||||
unique_formats = []
|
||||
for f in formats:
|
||||
if f['url'] not in format_urls:
|
||||
format_urls.add(f['url'])
|
||||
fragments = f.get('fragments')
|
||||
if callable(fragments):
|
||||
unique_formats.append(f)
|
||||
|
||||
elif fragments:
|
||||
fragment_urls = frozenset(
|
||||
fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path'])
|
||||
for fragment in fragments)
|
||||
if fragment_urls not in seen_fragment_urls:
|
||||
seen_fragment_urls.add(fragment_urls)
|
||||
unique_formats.append(f)
|
||||
|
||||
elif f['url'] not in seen_urls:
|
||||
seen_urls.add(f['url'])
|
||||
unique_formats.append(f)
|
||||
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||
@@ -2065,7 +2101,7 @@ class InfoExtractor:
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
@@ -2800,11 +2836,11 @@ class InfoExtractor:
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if try_call(lambda: base_url_e.text) is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
if re.match(r'https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and base_url.startswith('/'):
|
||||
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
||||
elif mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
elif mpd_base_url and not re.match(r'https?://', base_url):
|
||||
if not mpd_base_url.endswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
@@ -2894,7 +2930,7 @@ class InfoExtractor:
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
return 'url' if re.match(r'https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
@@ -3059,7 +3095,11 @@ class InfoExtractor:
|
||||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
stream_language = stream.get('Language', 'und')
|
||||
# IsmFD expects ISO 639 Set 2 language codes (3-character length)
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
|
||||
stream_language = stream.get('Language') or 'und'
|
||||
if len(stream_language) != 3:
|
||||
stream_language = ISO639Utils.short2long(stream_language) or 'und'
|
||||
for track in stream.findall('QualityLevel'):
|
||||
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
|
||||
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
|
||||
@@ -3489,7 +3529,7 @@ class InfoExtractor:
|
||||
continue
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@@ -3741,7 +3781,7 @@ class InfoExtractor:
|
||||
""" Merge subtitle dictionaries, language by language. """
|
||||
if target is None:
|
||||
target = {}
|
||||
for d in dicts:
|
||||
for d in filter(None, dicts):
|
||||
for lang, subs in d.items():
|
||||
target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
|
||||
return target
|
||||
@@ -3763,7 +3803,7 @@ class InfoExtractor:
|
||||
def mark_watched(self, *args, **kwargs):
|
||||
if not self.get_param('mark_watched', False):
|
||||
return
|
||||
if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed:
|
||||
if (self.supports_login() and self._get_login_info()[0] is not None) or self._cookies_passed:
|
||||
self._mark_watched(*args, **kwargs)
|
||||
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
|
||||
m_paths = re.finditer(
|
||||
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||
paths = orderedSet(m.group(1) for m in m_paths)
|
||||
build_url = lambda path: urllib.parse.urljoin(base_url, path)
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video_params(self, webpage, display_id):
|
||||
|
||||
@@ -456,7 +456,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'duration': ('duration_ms', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
@@ -484,7 +484,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'duration': ('duration_ms', {float_or_none(scale=1000)}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,14 +1,27 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import orderedSet
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from ..utils import extract_attributes, orderedSet
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class CTVNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||
_BASE_REGEX = r'https?://(?:[^.]+\.)?ctvnews\.ca/'
|
||||
_VIDEO_ID_RE = r'(?P<id>\d{5,})'
|
||||
_PLAYLIST_ID_RE = r'(?P<id>\d\.\d{5,})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
|
||||
rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
|
||||
rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
|
||||
rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
|
||||
rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||
'md5': '9b8624ba66351a23e0b6e1391971f9af',
|
||||
'md5': 'b608f466c7fa24b9666c6439d766ab7e',
|
||||
'info_dict': {
|
||||
'id': '901995',
|
||||
'ext': 'flv',
|
||||
@@ -16,6 +29,33 @@ class CTVNewsIE(InfoExtractor):
|
||||
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
||||
'timestamp': 1467286284,
|
||||
'upload_date': '20160630',
|
||||
'categories': [],
|
||||
'season_number': 0,
|
||||
'season': 'Season 0',
|
||||
'tags': [],
|
||||
'series': 'CTV News National | Archive | Stories 2',
|
||||
'season_id': '57981',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'duration': 764.631,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
|
||||
'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
|
||||
'info_dict': {
|
||||
'id': '3030933',
|
||||
'ext': 'flv',
|
||||
'title': 'Here’s what’s making news for Nov. 15',
|
||||
'description': 'Here are the top stories we’re working on for CTV News at 11 for Nov. 15',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
|
||||
'season_id': '58104',
|
||||
'season_number': 0,
|
||||
'tags': [],
|
||||
'season': 'Season 0',
|
||||
'categories': [],
|
||||
'series': 'CTV News Barrie',
|
||||
'upload_date': '20241116',
|
||||
'duration': 42.943,
|
||||
'timestamp': 1731722452,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
|
||||
@@ -31,6 +71,72 @@ class CTVNewsIE(InfoExtractor):
|
||||
'id': '1.2876780',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '1.5736957',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
|
||||
'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
|
||||
'info_dict': {
|
||||
'id': '2695026',
|
||||
'ext': 'flv',
|
||||
'season_id': '89852',
|
||||
'series': 'From CTV News Channel',
|
||||
'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
|
||||
'season': '2023',
|
||||
'title': 'Bank of Canada asks public about digital currency',
|
||||
'categories': [],
|
||||
'tags': [],
|
||||
'upload_date': '20230526',
|
||||
'season_number': 2023,
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'timestamp': 1685105157,
|
||||
'duration': 253.553,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
|
||||
'md5': '135cc592df607d29dddc931f1b756ae2',
|
||||
'info_dict': {
|
||||
'id': '582589',
|
||||
'ext': 'flv',
|
||||
'categories': [],
|
||||
'timestamp': 1427906183,
|
||||
'season_number': 0,
|
||||
'duration': 125.559,
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'series': 'CTV News Stox',
|
||||
'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
|
||||
'title': 'Berlin Wall',
|
||||
'season_id': '63817',
|
||||
'season': 'Season 0',
|
||||
'tags': [],
|
||||
'upload_date': '20150401',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
|
||||
'md5': 'a14c0603557decc6531260791c23cc5e',
|
||||
'info_dict': {
|
||||
'id': '3023759',
|
||||
'ext': 'flv',
|
||||
'season_number': 2024,
|
||||
'timestamp': 1731798000,
|
||||
'season': '2024',
|
||||
'episode': 'Episode 125',
|
||||
'description': 'CTV News Ottawa at Six',
|
||||
'duration': 2712.076,
|
||||
'episode_number': 125,
|
||||
'upload_date': '20241116',
|
||||
'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'categories': [],
|
||||
'tags': [],
|
||||
'series': 'CTV News Ottawa at Six',
|
||||
'season_id': '92667',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/1.810401',
|
||||
'only_matching': True,
|
||||
@@ -42,29 +148,35 @@ class CTVNewsIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _ninecninemedia_url_result(self, clip_id):
|
||||
return self.url_result(f'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE, clip_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
def ninecninemedia_url_result(clip_id):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': clip_id,
|
||||
'url': f'9c9media:ctvnews_web:{clip_id}',
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
|
||||
page_id = mobj.group('id')
|
||||
|
||||
if page_id.isdigit():
|
||||
return ninecninemedia_url_result(page_id)
|
||||
else:
|
||||
webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={
|
||||
'ot': 'example.AjaxPageLayout.ot',
|
||||
'maxItemsPerPage': 1000000,
|
||||
})
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
|
||||
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
if not entries:
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
if 'getAuthStates("' in webpage:
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in
|
||||
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
|
||||
return self.playlist_result(entries, page_id)
|
||||
if re.fullmatch(self._VIDEO_ID_RE, page_id):
|
||||
return self._ninecninemedia_url_result(page_id)
|
||||
|
||||
webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
|
||||
'ot': 'example.AjaxPageLayout.ot',
|
||||
'maxItemsPerPage': 1000000,
|
||||
})
|
||||
entries = [self._ninecninemedia_url_result(clip_id)
|
||||
for clip_id in orderedSet(re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
if not entries:
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
if 'getAuthStates("' in webpage:
|
||||
entries = [self._ninecninemedia_url_result(clip_id) for clip_id in
|
||||
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
|
||||
else:
|
||||
entries = [
|
||||
self._ninecninemedia_url_result(clip_id) for clip_id in
|
||||
traverse_obj(webpage, (
|
||||
{find_element(tag='jasper-player-container', html=True)},
|
||||
{extract_attributes}, 'axis-ids', {json.loads}, ..., 'axisId', {str}))
|
||||
]
|
||||
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
@@ -31,9 +28,6 @@ class CultureUnpluggedIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
# request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
|
||||
self._request_webpage(HEADRequest(
|
||||
'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
|
||||
movie_data = self._download_json(
|
||||
f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id)
|
||||
|
||||
|
||||
@@ -6,12 +6,37 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/all-american-homecoming/ready-or-not/?play=d848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'info_dict': {
|
||||
'id': 'd848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ready Or Not',
|
||||
'description': 'Simone is concerned about changes taking place at Bringston; JR makes a decision about his future.',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 2547,
|
||||
'timestamp': 1720519200,
|
||||
'uploader': 'CWTV',
|
||||
'chapters': 'count:6',
|
||||
'series': 'All American: Homecoming',
|
||||
'season_number': 3,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20240709',
|
||||
'season': 'Season 3',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
@@ -69,13 +94,14 @@ class CWTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)
|
||||
f'https://images.cwtv.com/feed/mobileapp/video-meta/apiversion_12/guid_{video_id}', video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U'
|
||||
mpx_url = update_url_query(
|
||||
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
|
||||
{'formats': 'M3U+none'})
|
||||
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import functools
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
@@ -51,6 +52,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, { # /uspaes/ in hls_url
|
||||
'url': 'https://iframe.dacast.com/vod/f9823fc6-faba-b98f-0d00-4a7b50a58c5b/348c5c84-b6af-4859-bb9d-1d01009c795b',
|
||||
'info_dict': {
|
||||
'id': '348c5c84-b6af-4859-bb9d-1d01009c795b',
|
||||
'ext': 'mp4',
|
||||
'title': 'pl1-edyta-rubas-211124.mp4',
|
||||
'uploader_id': 'f9823fc6-faba-b98f-0d00-4a7b50a58c5b',
|
||||
'thumbnail': 'https://universe-files.dacast.com/4d0bd042-a536-752d-fc34-ad2fa44bbcbb.png',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
|
||||
@@ -74,6 +84,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@functools.cached_property
|
||||
def _usp_signing_secret(self):
|
||||
player_js = self._download_webpage(
|
||||
'https://player.dacast.com/js/player.js', None, 'Downloading player JS')
|
||||
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
||||
return self._search_regex(
|
||||
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
||||
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||
@@ -94,10 +113,10 @@ class DacastVODIE(DacastBaseIE):
|
||||
if 'DRM_EXT' in hls_url:
|
||||
self.report_drm(video_id)
|
||||
elif '/uspaes/' in hls_url:
|
||||
# From https://player.dacast.com/js/player.js
|
||||
# Ref: https://player.dacast.com/js/player.js
|
||||
ts = int(time.time())
|
||||
signature = hashlib.sha1(
|
||||
f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex()
|
||||
f'{10413792000 - ts}{ts}{self._usp_signing_secret}'.encode()).digest().hex()
|
||||
hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
|
||||
|
||||
for retry in self.RetryManager():
|
||||
|
||||
@@ -10,11 +10,14 @@ from ..utils import (
|
||||
OnDemandPagedList,
|
||||
age_restricted,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -98,12 +101,20 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
dai\.ly/|
|
||||
(?:
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
|
||||
(?:www\.)?lequipe\.fr
|
||||
)/
|
||||
(?:
|
||||
swf/(?!video)|
|
||||
(?:(?:crawler|embed|swf)/)?video/|
|
||||
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
|
||||
)
|
||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
)
|
||||
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||
_TESTS = [{
|
||||
@@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
||||
@@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['en_quete_d_esprit'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
@@ -217,6 +228,86 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||
'only_matching': True,
|
||||
}, { # playlist-only
|
||||
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://dai.ly/x94cnnk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
|
||||
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
|
||||
'info_dict': {
|
||||
'id': 'x93blhi',
|
||||
'ext': 'mp4',
|
||||
'title': 'OnAir - 01/08/24',
|
||||
'description': '',
|
||||
'duration': 217,
|
||||
'timestamp': 1722505658,
|
||||
'upload_date': '20240801',
|
||||
'uploader': 'Financialounge',
|
||||
'uploader_id': 'x2vtgmm',
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080',
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
|
||||
'info_dict': {
|
||||
'id': 'x7wdsj',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
# https://www.dailymotion.com/crawler/video/x8u4owg
|
||||
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
|
||||
'info_dict': {
|
||||
'id': 'x8u4owg',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Le Parisien',
|
||||
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
|
||||
'upload_date': '20240309',
|
||||
'view_count': int,
|
||||
'timestamp': 1709997866,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x32f7b',
|
||||
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
|
||||
'duration': 428.0,
|
||||
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xry80.html?video=x8vu47w
|
||||
'url': 'https://www.metatube.com/en/videos/546765/This-frogs-decorates-Christmas-tree/',
|
||||
'info_dict': {
|
||||
'id': 'x8vu47w',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Metatube',
|
||||
'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080',
|
||||
'upload_date': '20240326',
|
||||
'view_count': int,
|
||||
'timestamp': 1711496732,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x2xpy74',
|
||||
'title': 'Está lindas ranitas ponen su arbolito',
|
||||
'duration': 28,
|
||||
'description': 'Que lindura',
|
||||
'tags': [],
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_COMMON_MEDIA_FIELDS = '''description
|
||||
@@ -231,17 +322,36 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
yield 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
player_url = url_or_none(attrs.get('src'))
|
||||
if not player_url:
|
||||
continue
|
||||
player_url = player_url.replace('.js', '.html')
|
||||
if player_url.startswith('//'):
|
||||
player_url = f'https:{player_url}'
|
||||
if video_id := attrs.get('data-video'):
|
||||
query_string = f'video={video_id}'
|
||||
elif playlist_id := attrs.get('data-playlist'):
|
||||
query_string = f'playlist={playlist_id}'
|
||||
else:
|
||||
continue
|
||||
yield update_url(player_url, query=query_string)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
||||
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
||||
|
||||
if playlist_id:
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
if is_playlist: # We matched the playlist query param as video_id
|
||||
playlist_id = video_id
|
||||
video_id = None
|
||||
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
f'http://www.dailymotion.com/playlist/{playlist_id}',
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
media = self._call_api(
|
||||
@@ -282,6 +392,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
title = metadata['title']
|
||||
is_live = media.get('isOnAir')
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for m in media_list:
|
||||
media_url = m.get('url')
|
||||
@@ -289,8 +401,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if media_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
@@ -310,20 +424,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||
f['fps'] = 60
|
||||
|
||||
subtitles = {}
|
||||
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
||||
for subtitle_lang, subtitle in subtitles_data.items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'url': subtitle_url,
|
||||
} for subtitle_url in subtitle.get('urls', [])]
|
||||
|
||||
thumbnails = []
|
||||
for height, poster_url in metadata.get('posters', {}).items():
|
||||
thumbnails.append({
|
||||
'height': int_or_none(height),
|
||||
'id': height,
|
||||
'url': poster_url,
|
||||
})
|
||||
thumbnails = traverse_obj(metadata, (
|
||||
('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
|
||||
'height': (0, {int_or_none}),
|
||||
'id': (0, {str}),
|
||||
'url': 1,
|
||||
}))
|
||||
|
||||
owner = metadata.get('owner') or {}
|
||||
stats = media.get('stats') or {}
|
||||
@@ -447,7 +559,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
|
||||
@@ -40,7 +40,7 @@ class DangalPlayBaseIE(InfoExtractor):
|
||||
'id': ('content_id', {str}),
|
||||
'title': ('display_title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'series': ('show_name', {str}, {lambda x: x or None}),
|
||||
'series': ('show_name', {str}, filter),
|
||||
'series_id': ('catalog_id', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'release_timestamp': ('release_date_uts', {int_or_none}),
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
jwt_decode_hs256,
|
||||
parse_codecs,
|
||||
try_get,
|
||||
url_or_none,
|
||||
@@ -13,9 +16,6 @@ from ..utils.traversal import traverse_obj
|
||||
class DigitalConcertHallIE(InfoExtractor):
|
||||
IE_DESC = 'DigitalConcertHall extractor'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'digitalconcerthall'
|
||||
_TESTS = [{
|
||||
'note': 'Playlist with only one video',
|
||||
@@ -69,59 +69,157 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
_LOGIN_HINT = ('Use --username token --password ACCESS_TOKEN where ACCESS_TOKEN '
|
||||
'is the "access_token_production" from your browser local storage')
|
||||
_REFRESH_HINT = 'or else use a "refresh_token" with --username refresh --password REFRESH_TOKEN'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_CLIENT_ID = 'dch.webapp'
|
||||
_CLIENT_SECRET = '2ySLN+2Fwb'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||
_OAUTH_HEADERS = {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'Origin': 'https://www.digitalconcerthall.com',
|
||||
'Referer': 'https://www.digitalconcerthall.com/',
|
||||
'User-Agent': _USER_AGENT,
|
||||
}
|
||||
_access_token = None
|
||||
_access_token_expiry = 0
|
||||
_refresh_token = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_token = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
||||
@property
|
||||
def _access_token_is_expired(self):
|
||||
return self._access_token_expiry - 30 <= int(time.time())
|
||||
|
||||
def _set_access_token(self, value):
|
||||
self._access_token = value
|
||||
self._access_token_expiry = traverse_obj(value, ({jwt_decode_hs256}, 'exp', {int})) or 0
|
||||
|
||||
def _cache_tokens(self, /):
|
||||
self.cache.store(self._NETRC_MACHINE, 'tokens', {
|
||||
'access_token': self._access_token,
|
||||
'refresh_token': self._refresh_token,
|
||||
})
|
||||
|
||||
def _fetch_new_tokens(self, invalidate=False):
|
||||
if invalidate:
|
||||
self.report_warning('Access token has been invalidated')
|
||||
self._set_access_token(None)
|
||||
|
||||
if not self._access_token_is_expired:
|
||||
return
|
||||
|
||||
if not self._refresh_token:
|
||||
self._set_access_token(None)
|
||||
self._cache_tokens()
|
||||
raise ExtractorError(
|
||||
'Access token has expired or been invalidated. '
|
||||
'Get a new "access_token_production" value from your browser '
|
||||
f'and try again, {self._REFRESH_HINT}', expected=True)
|
||||
|
||||
# If we only have a refresh token, we need a temporary "initial token" for the refresh flow
|
||||
bearer_token = self._access_token or self._download_json(
|
||||
self._OAUTH_URL, None, 'Obtaining initial token', 'Unable to obtain initial token',
|
||||
data=urlencode_postdata({
|
||||
'affiliate': 'none',
|
||||
'grant_type': 'device',
|
||||
'device_vendor': 'unknown',
|
||||
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio
|
||||
'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari',
|
||||
'app_id': 'dch.webapp',
|
||||
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio,
|
||||
# but this is no longer effective since actual login is not possible anymore
|
||||
'device_model': 'unknown',
|
||||
'app_id': self._CLIENT_ID,
|
||||
'app_distributor': 'berlinphil',
|
||||
'app_version': '1.84.0',
|
||||
'client_secret': '2ySLN+2Fwb',
|
||||
}), headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})['access_token']
|
||||
'app_version': '1.95.0',
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
}), headers=self._OAUTH_HEADERS)['access_token']
|
||||
|
||||
try:
|
||||
login_response = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
response = self._download_json(
|
||||
self._OAUTH_URL, None, 'Refreshing token', 'Unable to refresh token',
|
||||
data=urlencode_postdata({
|
||||
'grant_type': 'refresh_token',
|
||||
'refresh_token': self._refresh_token,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
}), headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'Referer': 'https://www.digitalconcerthall.com',
|
||||
'Authorization': f'Bearer {login_token}',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
**self._OAUTH_HEADERS,
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
})
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
self._set_access_token(None)
|
||||
self._refresh_token = None
|
||||
self._cache_tokens()
|
||||
raise ExtractorError('Your tokens have been invalidated', expected=True)
|
||||
raise
|
||||
self._ACCESS_TOKEN = login_response['access_token']
|
||||
|
||||
self._set_access_token(response['access_token'])
|
||||
if refresh_token := traverse_obj(response, ('refresh_token', {str})):
|
||||
self.write_debug('New refresh token granted')
|
||||
self._refresh_token = refresh_token
|
||||
self._cache_tokens()
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self.report_login()
|
||||
|
||||
if username == 'refresh':
|
||||
self._refresh_token = password
|
||||
self._fetch_new_tokens()
|
||||
|
||||
if username == 'token':
|
||||
if not traverse_obj(password, {jwt_decode_hs256}):
|
||||
raise ExtractorError(
|
||||
f'The access token passed to yt-dlp is not valid. {self._LOGIN_HINT}', expected=True)
|
||||
self._set_access_token(password)
|
||||
self._cache_tokens()
|
||||
|
||||
if username in ('refresh', 'token'):
|
||||
if self.get_param('cachedir') is not False:
|
||||
token_type = 'access' if username == 'token' else 'refresh'
|
||||
self.to_screen(f'Your {token_type} token has been cached to disk. To use the cached '
|
||||
'token next time, pass --username cache along with any password')
|
||||
return
|
||||
|
||||
if username != 'cache':
|
||||
raise ExtractorError(
|
||||
'Login with username and password is no longer supported '
|
||||
f'for this site. {self._LOGIN_HINT}, {self._REFRESH_HINT}', expected=True)
|
||||
|
||||
# Try cached access_token
|
||||
cached_tokens = self.cache.load(self._NETRC_MACHINE, 'tokens', default={})
|
||||
self._set_access_token(cached_tokens.get('access_token'))
|
||||
self._refresh_token = cached_tokens.get('refresh_token')
|
||||
if not self._access_token_is_expired:
|
||||
return
|
||||
|
||||
# Try cached refresh_token
|
||||
self._fetch_new_tokens(invalidate=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._ACCESS_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
if not self._access_token:
|
||||
self.raise_login_required(
|
||||
'All content on this site is only available for registered users. '
|
||||
f'{self._LOGIN_HINT}, {self._REFRESH_HINT}', method=None)
|
||||
|
||||
def _entries(self, items, language, type_, **kwargs):
|
||||
for item in items:
|
||||
video_id = item['id']
|
||||
stream_info = self._download_json(
|
||||
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
|
||||
for should_retry in (True, False):
|
||||
self._fetch_new_tokens(invalidate=not should_retry)
|
||||
try:
|
||||
stream_info = self._download_json(
|
||||
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._access_token}',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
break
|
||||
except ExtractorError as error:
|
||||
if should_retry and isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
continue
|
||||
raise
|
||||
|
||||
formats = []
|
||||
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
|
||||
@@ -157,7 +255,6 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'Accept': 'application/json',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
})
|
||||
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||
|
||||
|
||||
@@ -319,32 +319,6 @@ class DPlayIE(DPlayBaseIE):
|
||||
url, display_id, host, 'dplay' + country, country, domain)
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
|
||||
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
|
||||
|
||||
@@ -373,6 +347,45 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class HGTVDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'info_dict': {
|
||||
'id': '7332936',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'title': 'Vom Landleben ins Loft',
|
||||
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
|
||||
'episode': 'Episode 7',
|
||||
'episode_number': 7,
|
||||
'season': 'Season 7',
|
||||
'season_number': 7,
|
||||
'series': 'Mein Kleinstadt-Traumhaus',
|
||||
'duration': 2645.0,
|
||||
'timestamp': 1725998100,
|
||||
'upload_date': '20240910',
|
||||
'creators': ['HGTV'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
|
||||
},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod.disco-api.com',
|
||||
'realm': 'hgtv',
|
||||
'country': 'de',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
|
||||
@@ -6,8 +6,10 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,43 +38,56 @@ class DropboxIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def _yield_decoded_parts(self, webpage):
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = urllib.parse.unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
content_id = None
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if '/sm/password' in part:
|
||||
content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID')
|
||||
break
|
||||
|
||||
if password:
|
||||
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
|
||||
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Authentication failed!', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
else:
|
||||
if content_id:
|
||||
password = self.get_param('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
formats, subtitles, has_anonymous_download = [], {}, False
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': content_id,
|
||||
'password': password,
|
||||
'url': update_url(url, scheme='', netloc=''),
|
||||
}))
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
has_anonymous_download = False
|
||||
thumbnail = None
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if not has_anonymous_download:
|
||||
has_anonymous_download = self._search_regex(
|
||||
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
thumbnail = self._search_regex(
|
||||
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
|
||||
break
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
@@ -89,4 +104,5 @@ class DropboxIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
51
yt_dlp/extractor/drtalks.py
Normal file
51
yt_dlp/extractor/drtalks.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DrTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?drtalks\.com/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://drtalks.com/videos/six-pillars-of-resilience-tools-for-managing-stress-and-flourishing/',
|
||||
'info_dict': {
|
||||
'id': '6366193757112',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '6314452011001',
|
||||
'tags': ['resilience'],
|
||||
'description': 'md5:9c6805aee237ee6de8052461855b9dda',
|
||||
'timestamp': 1734546659,
|
||||
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/12/Episode-82-Eva-Selhub-DrTalks-Thumbs.jpg',
|
||||
'title': 'Six Pillars of Resilience: Tools for Managing Stress and Flourishing',
|
||||
'duration': 2800.682,
|
||||
'upload_date': '20241218',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://drtalks.com/videos/the-pcos-puzzle-mastering-metabolic-health-with-marcelle-pick/',
|
||||
'info_dict': {
|
||||
'id': '6364699891112',
|
||||
'ext': 'mp4',
|
||||
'title': 'The PCOS Puzzle: Mastering Metabolic Health with Marcelle Pick',
|
||||
'description': 'md5:e87cbe00ca50135d5702787fc4043aaa',
|
||||
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/11/Episode-34-Marcelle-Pick-OBGYN-NP-DrTalks.jpg',
|
||||
'duration': 3515.2,
|
||||
'tags': ['pcos'],
|
||||
'upload_date': '20241114',
|
||||
'timestamp': 1731592119,
|
||||
'uploader_id': '6314452011001',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
next_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']['video']
|
||||
|
||||
return self.url_result(
|
||||
next_data['videos']['brightcoveVideoLink'], BrightcoveNewIE, video_id,
|
||||
url_transparent=True,
|
||||
**traverse_obj(next_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('videos', 'summury', {str}),
|
||||
'thumbnail': ('featuredImage', 'node', 'sourceUrl', {url_or_none}),
|
||||
}))
|
||||
@@ -139,12 +139,11 @@ class DRTVIE(InfoExtractor):
|
||||
return
|
||||
|
||||
token_response = self._download_json(
|
||||
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
'https://isl.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
note='Downloading anonymous token', headers={
|
||||
'content-type': 'application/json',
|
||||
}, query={
|
||||
'device': 'web_browser',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'device': 'phone_android',
|
||||
'lang': 'da',
|
||||
'supportFallbackToken': 'true',
|
||||
}, data=json.dumps({
|
||||
|
||||
@@ -5,15 +5,16 @@ from ..utils import (
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class DuoplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
|
||||
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)(?:[/?#]|$)'
|
||||
_TESTS = [{
|
||||
'note': 'Siberi võmm S02E12',
|
||||
'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
|
||||
@@ -34,15 +35,16 @@ class DuoplayIE(InfoExtractor):
|
||||
'episode_number': 12,
|
||||
'episode_id': '24',
|
||||
},
|
||||
'skip': 'No video found',
|
||||
}, {
|
||||
'note': 'Empty title',
|
||||
'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
|
||||
'md5': '6aca68be71112314738dd17cced7f8bf',
|
||||
'md5': 'cba9f5dabf2582b224d80ac44fb80e47',
|
||||
'info_dict': {
|
||||
'id': '17_14',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ühikarotid',
|
||||
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
|
||||
'title': 'Episode 14',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'description': 'md5:4719b418e058c209def41d48b601276e',
|
||||
'upload_date': '20100916',
|
||||
'timestamp': 1284661800,
|
||||
@@ -52,6 +54,8 @@ class DuoplayIE(InfoExtractor):
|
||||
'season_number': 2,
|
||||
'episode_id': '14',
|
||||
'release_year': 2010,
|
||||
'episode': 'Episode 14',
|
||||
'episode_number': 14,
|
||||
},
|
||||
}, {
|
||||
'note': 'Movie without expiry',
|
||||
@@ -68,10 +72,32 @@ class DuoplayIE(InfoExtractor):
|
||||
'timestamp': 1671054000,
|
||||
'release_year': 2018,
|
||||
},
|
||||
'skip': 'No video found',
|
||||
}, {
|
||||
'note': 'Episode url without show name',
|
||||
'url': 'https://duoplay.ee/9644?ep=185',
|
||||
'md5': '63f324b4fe2dbd8194dca16a6d52184a',
|
||||
'info_dict': {
|
||||
'id': '9644_185',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 185',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'description': 'md5:ed25ba4e9e5d54bc291a4a0cdd241467',
|
||||
'upload_date': '20241120',
|
||||
'timestamp': 1732077000,
|
||||
'episode': 'Episode 63',
|
||||
'episode_id': '185',
|
||||
'episode_number': 63,
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'series': 'Telehommik',
|
||||
'series_id': '9644',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
telecast_id, episode = self._match_valid_url(url).group('id', 'ep')
|
||||
telecast_id = self._match_id(url)
|
||||
episode = traverse_obj(parse_qs(url), ('ep', 0, {int_or_none}, {str_or_none}))
|
||||
video_id = join_nonempty(telecast_id, episode, delim='_')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_player = try_call(lambda: extract_attributes(
|
||||
@@ -79,25 +105,33 @@ class DuoplayIE(InfoExtractor):
|
||||
if not video_player or not video_player.get('manifest-url'):
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
manifest_url = video_player['manifest-url']
|
||||
session_token = self._download_json(
|
||||
'https://sts.postimees.ee/session/register', video_id, 'Registering session',
|
||||
'Unable to register session', headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Original-URI': manifest_url,
|
||||
})['session']
|
||||
|
||||
episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
|
||||
'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4', query={'s': session_token}),
|
||||
**traverse_obj(episode_attr, {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
'title': ('title', {str}),
|
||||
'description': ('synopsis', {str}),
|
||||
'thumbnail': ('images', 'original'),
|
||||
'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
|
||||
'cast': ('cast', {lambda x: x.split(', ')}),
|
||||
'cast': ('cast', filter, {lambda x: x.split(', ')}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
**(traverse_obj(episode_attr, {
|
||||
'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))),
|
||||
'series': 'title',
|
||||
'title': (None, (('subtitle', {str}, filter), {value(f'Episode {episode}' if episode else None)})),
|
||||
'series': ('title', {str}),
|
||||
'series_id': ('telecast_id', {str_or_none}),
|
||||
'season_number': ('season_id', {int_or_none}),
|
||||
'episode': 'subtitle',
|
||||
'episode': ('subtitle', {str}, filter),
|
||||
'episode_number': ('episode_nr', {int_or_none}),
|
||||
'episode_id': ('episode_id', {str_or_none}),
|
||||
}, get_all=False) if episode_attr.get('category') != 'movies' else {}),
|
||||
|
||||
@@ -162,7 +162,7 @@ class DVTVIE(InfoExtractor):
|
||||
items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
|
||||
if items:
|
||||
return self.playlist_result(
|
||||
[self._parse_video_metadata(i, video_id, timestamp) for i in items],
|
||||
(self._parse_video_metadata(i, video_id, timestamp) for i in items),
|
||||
video_id, self._html_search_meta('twitter:title', webpage))
|
||||
|
||||
item = self._search_regex(
|
||||
|
||||
@@ -207,7 +207,7 @@ class ERRJupiterIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': ('heading', {str}),
|
||||
'alt_title': ('subHeading', {str}),
|
||||
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
|
||||
'description': (('lead', 'body'), {clean_html}, filter),
|
||||
'timestamp': ('created', {int_or_none}),
|
||||
'modified_timestamp': ('updated', {int_or_none}),
|
||||
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
@@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
|
||||
def _extract_formats_and_subs(self, video_id):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(media_info, (
|
||||
'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
|
||||
'Formats', lambda _, v: url_or_none(v['Url']))):
|
||||
fmt_url = media['Url']
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return formats, subs
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
|
||||
class WatchESPNIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'info_dict': {
|
||||
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huddersfield vs. Burnley',
|
||||
'duration': 7500,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
'title': 'Abilene Chrstn vs. Texas Tech',
|
||||
'duration': 14166,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'info_dict': {
|
||||
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
|
||||
'duration': 8335,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'UC Davis vs. California',
|
||||
'duration': 9547,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'info_dict': {
|
||||
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Wheel - Episode 10',
|
||||
'duration': 3352,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'The College Football Show',
|
||||
'duration': 3639,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
|
||||
if not cookie:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
|
||||
id_token = self._download_json(
|
||||
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
|
||||
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
|
||||
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
|
||||
}).encode())['data']['token']['id_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'devices', video_id,
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'},
|
||||
@@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
|
||||
})['access_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
|
||||
'accounts/grant', video_id, payload={'id_token': id_token},
|
||||
headers={
|
||||
'Authorization': token,
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
|
||||
@@ -50,7 +50,7 @@ class FacebookIE(InfoExtractor):
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
events/(?:[^/]+/)?|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
groups/[^/]+/(?:permalink|posts)/(?:[\da-f]+/)?|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
@@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'duration': 3133.583,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
@@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
@@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'md5': '1659aa21fb3dd1585874f668e81a72c8',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
'duration': 44.181,
|
||||
},
|
||||
}, {
|
||||
# FIXME: unable to extract uploader, no formats found
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
@@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1511548260,
|
||||
'upload_date': '20171124',
|
||||
'uploader': 'Vickie Gentry',
|
||||
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
'duration': 148.224,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
@@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
@@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'duration': 4524.001,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
@@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'duration': 3.283,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
@@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# FIXME: Cannot parse data error
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
@@ -407,6 +410,9 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Comitato Liberi Pensatori',
|
||||
'uploader_id': '100065709540881',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
@@ -498,7 +504,8 @@ class FacebookIE(InfoExtractor):
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
or get_first(post, ('event', 'event_creator', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
@@ -524,6 +531,11 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
|
||||
'like_count': ('likers', 'count', {int}),
|
||||
'comment_count': ('total_comment_count', {int}),
|
||||
'repost_count': ('share_count_reduced', {parse_count}),
|
||||
}), get_all=False),
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@@ -554,12 +566,13 @@ class FacebookIE(InfoExtractor):
|
||||
return extract_video_data(try_get(
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
def extract_dash_manifest(vid_data, formats, mpd_url=None):
|
||||
dash_manifest = traverse_obj(
|
||||
vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
mpd_url=url_or_none(vid_data.get('dash_manifest_url')) or mpd_url))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
@@ -609,16 +622,20 @@ class FacebookIE(InfoExtractor):
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
|
||||
# Legacy formats extraction
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
playable_url = fmt_data.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
if determine_ext(playable_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(playable_url, video_id))
|
||||
formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -626,7 +643,29 @@ class FacebookIE(InfoExtractor):
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
extract_dash_manifest(fmt_data, formats)
|
||||
|
||||
# New videoDeliveryResponse formats extraction
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
|
||||
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
|
||||
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
|
||||
for idx, dash_manifest in enumerate(dash_manifests):
|
||||
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
|
||||
if not dash_manifests:
|
||||
# Only extract from MPD URLs if the manifests are not already provided
|
||||
for mpd_url in mpd_urls:
|
||||
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
|
||||
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
|
||||
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
'quality': q(format_id) - 3,
|
||||
'url': prog_fmt['progressive_url'],
|
||||
})
|
||||
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
|
||||
|
||||
if not formats:
|
||||
# Do not append false positive entry w/o any formats
|
||||
return
|
||||
@@ -932,18 +971,21 @@ class FacebookReelIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -193,9 +193,9 @@ class FunimationIE(FunimationBaseIE):
|
||||
|
||||
for lang, version, fmt in self._get_experiences(episode):
|
||||
experience_id = str(fmt['experienceId'])
|
||||
if (only_initial_experience and experience_id != initial_experience_id
|
||||
or requested_languages and lang.lower() not in requested_languages
|
||||
or requested_versions and version.lower() not in requested_versions):
|
||||
if ((only_initial_experience and experience_id != initial_experience_id)
|
||||
or (requested_languages and lang.lower() not in requested_languages)
|
||||
or (requested_versions and version.lower() not in requested_versions)):
|
||||
continue
|
||||
thumbnails.append({'url': fmt.get('poster')})
|
||||
duration = max(duration, fmt.get('duration', 0))
|
||||
|
||||
@@ -3,7 +3,7 @@ from .nexx import NexxIE
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8610449476156f338761a75391b0017d',
|
||||
@@ -27,6 +27,9 @@ class FunkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
141
yt_dlp/extractor/gamedevtv.py
Normal file
141
yt_dlp/extractor/gamedevtv.py
Normal file
@@ -0,0 +1,141 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GameDevTVDashboardIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamedev\.tv/dashboard/courses/(?P<course_id>\d+)(?:/(?P<lecture_id>\d+))?'
|
||||
_NETRC_MACHINE = 'gamedevtv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gamedev.tv/dashboard/courses/25',
|
||||
'info_dict': {
|
||||
'id': '25',
|
||||
'title': 'Complete Blender Creator 3: Learn 3D Modelling for Beginners',
|
||||
'tags': ['blender', 'course', 'all', 'box modelling', 'sculpting'],
|
||||
'categories': ['Blender', '3D Art'],
|
||||
'thumbnail': 'https://gamedev-files.b-cdn.net/courses/qisc9pmu1jdc.jpg',
|
||||
'upload_date': '20220516',
|
||||
'timestamp': 1652694420,
|
||||
'modified_date': '20241027',
|
||||
'modified_timestamp': 1730049658,
|
||||
},
|
||||
'playlist_count': 100,
|
||||
}, {
|
||||
'url': 'https://www.gamedev.tv/dashboard/courses/63/2279',
|
||||
'info_dict': {
|
||||
'id': 'df04f4d8-68a4-4756-a71b-9ca9446c3a01',
|
||||
'ext': 'mp4',
|
||||
'modified_timestamp': 1701695752,
|
||||
'upload_date': '20230504',
|
||||
'episode': 'MagicaVoxel Community Course Introduction',
|
||||
'series_id': '63',
|
||||
'title': 'MagicaVoxel Community Course Introduction',
|
||||
'timestamp': 1683195397,
|
||||
'modified_date': '20231204',
|
||||
'categories': ['3D Art', 'MagicaVoxel'],
|
||||
'season': 'MagicaVoxel Community Course',
|
||||
'tags': ['MagicaVoxel', 'all', 'course'],
|
||||
'series': 'MagicaVoxel 3D Art Mini Course',
|
||||
'duration': 1405,
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'season_id': '219',
|
||||
'description': 'md5:a378738c5bbec1c785d76c067652d650',
|
||||
'display_id': '63-219-2279',
|
||||
'alt_title': '1_CC_MVX MagicaVoxel Community Course Introduction.mp4',
|
||||
'thumbnail': 'https://vz-23691c65-6fa.b-cdn.net/df04f4d8-68a4-4756-a71b-9ca9446c3a01/thumbnail.jpg',
|
||||
},
|
||||
}]
|
||||
_API_HEADERS = {}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
response = self._download_json(
|
||||
'https://api.gamedev.tv/api/students/login', None, 'Logging in',
|
||||
headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps({
|
||||
'email': username,
|
||||
'password': password,
|
||||
'cart_items': [],
|
||||
}).encode())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise ExtractorError('Invalid username/password', expected=True)
|
||||
raise
|
||||
|
||||
self._API_HEADERS['Authorization'] = f'{response["token_type"]} {response["access_token"]}'
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._API_HEADERS.get('Authorization'):
|
||||
self.raise_login_required(
|
||||
'This content is only available with purchase', method='password')
|
||||
|
||||
def _entries(self, data, course_id, course_info, selected_lecture):
|
||||
for section in traverse_obj(data, ('sections', ..., {dict})):
|
||||
section_info = traverse_obj(section, {
|
||||
'season_id': ('id', {str_or_none}),
|
||||
'season': ('title', {str}),
|
||||
'season_number': ('order', {int_or_none}),
|
||||
})
|
||||
for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))):
|
||||
if selected_lecture and str(lecture.get('id')) != selected_lecture:
|
||||
continue
|
||||
display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id'))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls')
|
||||
yield {
|
||||
**course_info,
|
||||
**section_info,
|
||||
'id': display_id, # fallback
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': course_info.get('title'),
|
||||
'series_id': course_id,
|
||||
**traverse_obj(lecture, {
|
||||
'id': ('video', 'guid', {str}),
|
||||
'title': ('title', {str}),
|
||||
'alt_title': ('video', 'title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'episode': ('title', {str}),
|
||||
'episode_number': ('order', {int_or_none}),
|
||||
'duration': ('video', 'duration_in_sec', {int_or_none}),
|
||||
'timestamp': ('video', 'created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('video', 'updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('video', 'thumbnailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id, lecture_id = self._match_valid_url(url).group('course_id', 'lecture_id')
|
||||
data = self._download_json(
|
||||
f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id,
|
||||
headers=self._API_HEADERS)['data']
|
||||
|
||||
course_info = traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'categories': ('categories', ..., 'title', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
})
|
||||
|
||||
entries = self._entries(data, course_id, course_info, lecture_id)
|
||||
if lecture_id:
|
||||
lecture = next(entries, None)
|
||||
if not lecture:
|
||||
raise ExtractorError('Lecture not found')
|
||||
return lecture
|
||||
return self.playlist_result(entries, course_id, **course_info)
|
||||
@@ -8,6 +8,9 @@ from .common import InfoExtractor
|
||||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
MEDIA_EXTENSIONS,
|
||||
@@ -2340,7 +2343,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
if re.match(r'[^\s/]+\.[^\s/]+/', url):
|
||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2373,6 +2376,11 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
|
||||
impersonate = self._configuration_arg('impersonate', ['false'])
|
||||
if 'false' in impersonate:
|
||||
impersonate = None
|
||||
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||
@@ -2381,10 +2389,29 @@ class GenericIE(InfoExtractor):
|
||||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}))
|
||||
try:
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||
and e.cause.response.extensions.get('impersonate') is None):
|
||||
raise
|
||||
cf_cookie_domain = traverse_obj(
|
||||
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||
('__cf_bm', 'domain'))
|
||||
if cf_cookie_domain:
|
||||
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
|
||||
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
|
||||
'how to install the required impersonation dependency, and ')
|
||||
raise ExtractorError(
|
||||
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
|
||||
|
||||
new_url = full_response.url
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
@@ -2400,7 +2427,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||
|
||||
91
yt_dlp/extractor/germanupa.py
Normal file
91
yt_dlp/extractor/germanupa.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GermanupaIE(InfoExtractor):
|
||||
IE_DESC = 'germanupa.de'
|
||||
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
|
||||
'info_dict': {
|
||||
'id': '909179246',
|
||||
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'duration': 3987,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'audio, uses GenericIE',
|
||||
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
|
||||
'info_dict': {
|
||||
'id': '1867346676',
|
||||
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
|
||||
'ext': 'opus',
|
||||
'timestamp': 1720545088,
|
||||
'upload_date': '20240709',
|
||||
'duration': 3910.557,
|
||||
'like_count': int,
|
||||
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
|
||||
'uploader': 'German UPA',
|
||||
'repost_count': int,
|
||||
'genres': ['Science'],
|
||||
'license': 'all-rights-reserved',
|
||||
'uploader_url': 'https://soundcloud.com/user-80097677',
|
||||
'uploader_id': '471579486',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
|
||||
},
|
||||
}, {
|
||||
'note': 'Nur für Mitglieder/Just for members',
|
||||
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
|
||||
'info_dict': {
|
||||
'id': '986994430',
|
||||
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20240719',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'timestamp': 1721373980,
|
||||
'license': 'by-sa',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
|
||||
'duration': 2146,
|
||||
'release_timestamp': 1721373980,
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'upload_date': '20240719',
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
param_url = traverse_obj(
|
||||
self._search_regex(
|
||||
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
|
||||
webpage, 'embedded video', default=None, group='url'),
|
||||
({parse_qs}, 'url', 0, {url_or_none}))
|
||||
|
||||
if not param_url:
|
||||
if self._search_regex(
|
||||
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
|
||||
webpage, 'login wrapper', default=None):
|
||||
self.raise_login_required('This video is only available for members')
|
||||
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
|
||||
|
||||
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
|
||||
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)
|
||||
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GolemIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||
|
||||
@@ -5,56 +5,63 @@ import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class GoPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)'
|
||||
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
|
||||
|
||||
_NETRC_MACHINE = 'goplay'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay',
|
||||
'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
|
||||
'info_dict': {
|
||||
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811',
|
||||
'id': '2baa4560-87a0-421b-bffc-359914e3c387',
|
||||
'ext': 'mp4',
|
||||
'title': 'S3 - Aflevering 2',
|
||||
'series': 'De Container Cup',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'title': 'S22 - Aflevering 1',
|
||||
'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
|
||||
'series': 'De Slimste Mens ter Wereld',
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 22,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 22',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay',
|
||||
'url': 'https://www.goplay.be/video/1917',
|
||||
'info_dict': {
|
||||
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf',
|
||||
'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Family for the Holidays',
|
||||
'title': '1917',
|
||||
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
|
||||
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
|
||||
'ext': 'mp4',
|
||||
'title': 'S11 - Aflevering 1',
|
||||
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
|
||||
'episode': 'Episode 1',
|
||||
'series': 'De Mol',
|
||||
'season_number': 11,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 11',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}]
|
||||
|
||||
@@ -69,27 +76,42 @@ class GoPlayIE(InfoExtractor):
|
||||
if not self._id_token:
|
||||
raise self.raise_login_required(method='password')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, display_id = self._match_valid_url(url).group(0, 'display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
|
||||
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
movie = video_data.get('movie')
|
||||
if movie:
|
||||
video_id = movie['videoUuid']
|
||||
info_dict = {
|
||||
'title': movie.get('title'),
|
||||
}
|
||||
else:
|
||||
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False)
|
||||
video_id = episode['videoUuid']
|
||||
info_dict = {
|
||||
'title': episode.get('episodeTitle'),
|
||||
'series': traverse_obj(episode, ('program', 'title')),
|
||||
'season_number': episode.get('seasonNumber'),
|
||||
'episode_number': episode.get('episodeNumber'),
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
|
||||
meta = traverse_obj(nextjs_data, (
|
||||
..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
|
||||
|
||||
video_id = meta['uuid']
|
||||
info_dict = traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
})
|
||||
|
||||
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
|
||||
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
|
||||
episode_data = traverse_obj(
|
||||
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
|
||||
if not episode_data:
|
||||
continue
|
||||
|
||||
episode_title = traverse_obj(
|
||||
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
|
||||
info_dict.update({
|
||||
'title': episode_title or info_dict.get('title'),
|
||||
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
|
||||
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
|
||||
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
|
||||
})
|
||||
break
|
||||
|
||||
api = self._download_json(
|
||||
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
|
||||
@@ -8,15 +8,19 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
IE_NAME = 'huya:live'
|
||||
IE_DESC = 'huya.com'
|
||||
TESTS = [{
|
||||
@@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '572329',
|
||||
'title': str,
|
||||
'ext': 'flv',
|
||||
'description': str,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
@@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
|
||||
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
|
||||
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
|
||||
return fm, ss
|
||||
|
||||
|
||||
class HuyaVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
|
||||
IE_NAME = 'huya:video'
|
||||
IE_DESC = '虎牙视频'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.huya.com/video/play/1002412640.html',
|
||||
'info_dict': {
|
||||
'id': '1002412640',
|
||||
'ext': 'mp4',
|
||||
'title': '8月3日',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 14,
|
||||
'uploader': '虎牙-ATS欧卡车队青木',
|
||||
'uploader_id': '1564376151',
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.huya.com/video/play/556054543.html',
|
||||
'info_dict': {
|
||||
'id': '556054543',
|
||||
'ext': 'mp4',
|
||||
'title': '我不挑事 也不怕事',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 1864,
|
||||
'uploader': '卡尔',
|
||||
'uploader_id': '367138632',
|
||||
'upload_date': '20210811',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url: str):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent', video_id,
|
||||
query={'videoId': video_id})['data']['moment']['videoInfo']
|
||||
|
||||
formats = []
|
||||
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': definition['url'],
|
||||
**traverse_obj(definition, {
|
||||
'format_id': ('defName', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('videoCover', {url_or_none}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'uploader': ('nickName', {str}),
|
||||
'uploader_id': ('uid', {str_or_none}),
|
||||
'upload_date': ('videoUploadTime', {unified_strdate}),
|
||||
'view_count': ('videoPlayNum', {int_or_none}),
|
||||
'comment_count': ('videoCommentNum', {int_or_none}),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -63,7 +62,7 @@ class IlPostIE(InfoExtractor):
|
||||
'url': ('podcast_raw_url', {url_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('milliseconds', {float_or_none(scale=1000)}),
|
||||
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ImgurIE(ImgurBaseIE):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
@@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE):
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 'MRW gifv is up and running without any bugs',
|
||||
'timestamp': 1416446068,
|
||||
'upload_date': '20141120',
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1416446068,
|
||||
'release_date': '20141120',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
'only_matching': True,
|
||||
@@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE):
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1710491255,
|
||||
'release_date': '20240315',
|
||||
'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -208,7 +225,10 @@ class ImgurIE(ImgurBaseIE):
|
||||
}), get_all=False),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': url_or_none(search('thumbnailUrl')),
|
||||
'thumbnails': [{
|
||||
'url': thumbnail_url,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}
|
||||
|
||||
@@ -252,17 +272,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE):
|
||||
|
||||
class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:gallery'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/gallery/Q95ko',
|
||||
'info_dict': {
|
||||
'id': 'Q95ko',
|
||||
'title': 'Adding faces make every GIF better',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
'skip': 'Zoinks! You\'ve taken a wrong turn.',
|
||||
}, {
|
||||
# TODO: static images - replace with animated/video gallery
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'only_matching': True,
|
||||
@@ -280,7 +292,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'release_timestamp': 1358554297,
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'release_date': '20130119',
|
||||
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# Test with slug
|
||||
'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx',
|
||||
'add_ies': ['Imgur'],
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
'timestamp': 1358554297,
|
||||
'upload_date': '20130119',
|
||||
'uploader_id': '1648642',
|
||||
'uploader': 'wittyusernamehere',
|
||||
'release_timestamp': 1358554297,
|
||||
'release_date': '20130119',
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
@@ -317,6 +349,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ',
|
||||
'info_dict': {
|
||||
'id': '6lAn9VQ',
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
|
||||
'add_ies': ['Imgur'],
|
||||
@@ -357,7 +396,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
|
||||
class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:album'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
_GALLERY = False
|
||||
_TESTS = [{
|
||||
# TODO: only static images - replace with animated/video gallery
|
||||
@@ -372,6 +411,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX',
|
||||
'info_dict': {
|
||||
'id': 'iX265HX',
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://imgur.com/a/8pih2Ed',
|
||||
'info_dict': {
|
||||
|
||||
@@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor):
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@@ -255,7 +254,7 @@ class InstagramIOSIE(InfoExtractor):
|
||||
|
||||
|
||||
class InstagramIE(InstagramBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/(?!share/)[^/?#]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
@@ -435,10 +434,10 @@ class InstagramIE(InstagramBaseIE):
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'doc_id': '8845758582119845',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
||||
|
||||
if not general_info:
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
|
||||
@@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
|
||||
'id': 'p51388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
|
||||
'upload_date': '20201103',
|
||||
'timestamp': 1604437480,
|
||||
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
|
||||
'episode': 'Partička (92)',
|
||||
'season': 'Partička',
|
||||
'series': 'Prima Partička',
|
||||
'episode_number': 92,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
|
||||
'info_dict': {
|
||||
'id': 'p1412199',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 3,
|
||||
'episode': 'Tenerife: V říši ohně',
|
||||
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
|
||||
'duration': 3111.0,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
|
||||
'title': 'Tenerife: V říši ohně',
|
||||
'timestamp': 1711825800,
|
||||
'upload_date': '20240330',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
@@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
|
||||
video_id = self._search_regex((
|
||||
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
@@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
|
||||
final_result = self._search_json_ld(webpage, video_id, default={})
|
||||
final_result.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': final_result.get('title') or title,
|
||||
'thumbnail': self._html_search_meta(
|
||||
['thumbnail', 'og:image', 'twitter:image'],
|
||||
webpage, 'thumbnail', default=None),
|
||||
|
||||
@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
|
||||
|
||||
|
||||
class SangiinInstructionIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
IE_DESC = False # this shouldn't be listed as a supported site
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
||||
raise ExtractorError(
|
||||
'Copy the link from the button below the video description/player '
|
||||
'and use that link to download. If there is no button in the frame, '
|
||||
'get the URL of the frame showing the video.', expected=True)
|
||||
|
||||
|
||||
class SangiinIE(InfoExtractor):
|
||||
|
||||
@@ -326,11 +326,11 @@ class JioCinemaIE(JioCinemaBaseIE):
|
||||
# fallback metadata
|
||||
'title': ('name', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('show', 'name', {str}, {lambda x: x or None}),
|
||||
'series': ('show', 'name', {str}, filter),
|
||||
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('totalDuration', {float_or_none}),
|
||||
'thumbnail': ('images', {url_or_none}),
|
||||
@@ -338,10 +338,10 @@ class JioCinemaIE(JioCinemaBaseIE):
|
||||
**traverse_obj(metadata, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}, {lambda x: x or None}),
|
||||
'season': ('seasonName', {str}, {lambda x: x or None}),
|
||||
'series': ('showName', {str}, filter),
|
||||
'season': ('seasonName', {str}, filter),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'season_id': ('seasonId', {str}, {lambda x: x or None}),
|
||||
'season_id': ('seasonId', {str}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
|
||||
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
|
||||
160
yt_dlp/extractor/kenh14.py
Normal file
160
yt_dlp/extractor/kenh14.py
Normal file
@@ -0,0 +1,160 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_attribute,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Kenh14VideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
|
||||
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
|
||||
'info_dict': {
|
||||
'id': '316173',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
|
||||
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
|
||||
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
|
||||
'tags': [],
|
||||
'uploader': 'Unbox Therapy',
|
||||
'upload_date': '20220517',
|
||||
'view_count': int,
|
||||
'duration': 722.86,
|
||||
'timestamp': 1652764468,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/video-316174.chn',
|
||||
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
|
||||
'info_dict': {
|
||||
'id': '316174',
|
||||
'ext': 'mp4',
|
||||
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
|
||||
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
|
||||
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
|
||||
'tags': [],
|
||||
'upload_date': '20220517',
|
||||
'view_count': int,
|
||||
'duration': 70.04,
|
||||
'timestamp': 1652766021,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/0-344740.chn',
|
||||
'md5': 'b843495d5e728142c8870c09b46df2a9',
|
||||
'info_dict': {
|
||||
'id': '344740',
|
||||
'ext': 'mov',
|
||||
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
|
||||
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
|
||||
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
|
||||
'uploader': 'Quang Vũ',
|
||||
'upload_date': '20241024',
|
||||
'view_count': int,
|
||||
'duration': 198.88,
|
||||
'timestamp': 1729741590,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
|
||||
direct_url = attrs['data-vid']
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
|
||||
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
|
||||
|
||||
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
|
||||
subtitles = {}
|
||||
video_data = self._download_json(
|
||||
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
|
||||
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
dash_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'uploader': ('author', {strip_or_none}),
|
||||
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': (
|
||||
traverse_obj(metadata, ('title', {strip_or_none}))
|
||||
or clean_html(self._og_search_title(webpage))
|
||||
or clean_html(get_element_by_class('vdbw-title', webpage))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': (
|
||||
clean_html(self._og_search_description(webpage))
|
||||
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
|
||||
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
|
||||
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
|
||||
{lambda x: x.split(';')}, ..., filter)),
|
||||
}
|
||||
|
||||
|
||||
class Kenh14PlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
|
||||
'info_dict': {
|
||||
'id': '71',
|
||||
'title': 'Trần Tình (Naked love) mùa 2',
|
||||
'description': 'md5:e9522339304956dea931722dd72eddb2',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
|
||||
'info_dict': {
|
||||
'id': '72',
|
||||
'title': 'Lau Lại Đầu Từ',
|
||||
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
category_detail = get_element_by_class('category-detail', webpage) or ''
|
||||
embed_info = traverse_obj(
|
||||
self._yield_json_ld(webpage, playlist_id),
|
||||
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
|
||||
|
||||
return self.playlist_from_matches(
|
||||
get_elements_html_by_class('video-item', webpage), playlist_id,
|
||||
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
|
||||
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
|
||||
ie=Kenh14VideoIE, playlist_description=(
|
||||
clean_html(get_element_by_class('description', category_detail))
|
||||
or unescapeHTML(embed_info.get('alternateName'))),
|
||||
thumbnail=traverse_obj(
|
||||
self._og_search_thumbnail(webpage),
|
||||
({url_or_none}, {update_url(query=None)})))
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
@@ -67,7 +66,7 @@ class KickIE(KickBaseIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if KickClipIE.suitable(url) else super().suitable(url)
|
||||
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
@@ -98,25 +97,25 @@ class KickIE(KickBaseIE):
|
||||
|
||||
class KickVODIE(KickBaseIE):
|
||||
IE_NAME = 'kick:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c',
|
||||
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
||||
'info_dict': {
|
||||
'id': 'e74614f4-5270-4319-90ad-32179f19a45c',
|
||||
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'ext': 'mp4',
|
||||
'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+',
|
||||
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
|
||||
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
|
||||
'channel': 'xqc',
|
||||
'channel_id': '668',
|
||||
'uploader': 'xQc',
|
||||
'uploader_id': '676',
|
||||
'upload_date': '20240724',
|
||||
'timestamp': 1721796562,
|
||||
'duration': 18566.0,
|
||||
'upload_date': '20240909',
|
||||
'timestamp': 1725919141,
|
||||
'duration': 10155.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['VALORANT'],
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
@@ -137,7 +136,7 @@ class KickVODIE(KickBaseIE):
|
||||
'uploader': ('livestream', 'channel', 'user', 'username', {str}),
|
||||
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('livestream', 'duration', {float_or_none(scale=1000)}),
|
||||
'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
|
||||
'categories': ('livestream', 'categories', ..., 'name', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
@@ -148,7 +147,7 @@ class KickVODIE(KickBaseIE):
|
||||
|
||||
class KickClipIE(KickBaseIE):
|
||||
IE_NAME = 'kick:clips'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?P<id>clip_[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'info_dict': {
|
||||
@@ -189,6 +188,26 @@ class KickClipIE(KickBaseIE):
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'info_dict': {
|
||||
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'ext': 'mp4',
|
||||
'title': 'KLJASLDJKLJKASDLJKDAS',
|
||||
'channel': 'spreen',
|
||||
'channel_id': '5312671',
|
||||
'uploader': 'AnormalBarraBaja',
|
||||
'uploader_id': '26518262',
|
||||
'duration': 43.0,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727399987,
|
||||
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Minecraft'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -119,7 +119,7 @@ class KikaIE(InfoExtractor):
|
||||
'width': ('frameWidth', {int_or_none}),
|
||||
'height': ('frameHeight', {int_or_none}),
|
||||
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
|
||||
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
|
||||
'filesize': ('fileSize', {int_or_none}, filter),
|
||||
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
}),
|
||||
|
||||
@@ -32,7 +32,7 @@ class LaracastsBaseIE(InfoExtractor):
|
||||
VimeoIE, url_transparent=True,
|
||||
**traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
|
||||
'webpage_url': ('path', {urljoin('https://laracasts.com')}),
|
||||
'title': ('title', {clean_html}),
|
||||
'season_number': ('chapter', {int_or_none}),
|
||||
'episode_number': ('position', {int_or_none}),
|
||||
@@ -104,7 +104,7 @@ class LaracastsPlaylistIE(LaracastsBaseIE):
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
|
||||
'duration': ('runTime', {parse_duration}),
|
||||
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
|
||||
'categories': ('taxonomy', 'name', {str}, all, filter),
|
||||
'tags': ('topics', ..., 'name', {str}),
|
||||
'modified_date': ('lastUpdated', {unified_strdate}),
|
||||
}),
|
||||
|
||||
@@ -66,7 +66,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
'license': ('value', 'license', {str}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'release_timestamp': ('value', 'release_time', {int_or_none}),
|
||||
'tags': ('value', 'tags', ..., {lambda x: x or None}),
|
||||
'tags': ('value', 'tags', ..., filter),
|
||||
'duration': ('value', stream_type, 'duration', {int_or_none}),
|
||||
'channel': ('signing_channel', 'value', 'title', {str}),
|
||||
'channel_id': ('signing_channel', 'claim_id', {str}),
|
||||
@@ -136,6 +136,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
IE_DESC = 'odysee.com'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
|
||||
(?:\$/(?:download|embed)/)?
|
||||
(?P<id>
|
||||
@@ -364,6 +365,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
IE_DESC = 'odysee.com channels'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
@@ -391,6 +393,7 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
|
||||
class LBRYPlaylistIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:playlist'
|
||||
IE_DESC = 'odysee.com playlists'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
|
||||
@@ -6,13 +6,11 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class LearningOnScreenIE(InfoExtractor):
|
||||
@@ -32,28 +30,24 @@ class LearningOnScreenIE(InfoExtractor):
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||
self.raise_login_required(
|
||||
'Use --cookies for authentication. See '
|
||||
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
|
||||
'for how to manually pass cookies', method=None)
|
||||
self.raise_login_required(method='session_cookies')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
details = traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'programme-details')}, {
|
||||
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
|
||||
{find_element(id='programme-details', html=True)}, {
|
||||
'title': ({find_element(tag='h2')}, {clean_html}),
|
||||
'timestamp': (
|
||||
{functools.partial(get_element_by_class, 'broadcast-date')},
|
||||
{find_element(cls='broadcast-date')},
|
||||
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
|
||||
'duration': (
|
||||
{functools.partial(get_element_by_class, 'prog-running-time')},
|
||||
{clean_html}, {parse_duration}),
|
||||
{find_element(cls='prog-running-time')}, {clean_html}, {parse_duration}),
|
||||
}))
|
||||
|
||||
title = details.pop('title', None) or traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
|
||||
{find_element(id='add-to-existing-playlist', html=True)},
|
||||
{extract_attributes}, 'data-record-title', {clean_html}))
|
||||
|
||||
entries = self._parse_html5_media_entries(
|
||||
|
||||
@@ -6,12 +6,10 @@ from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
get_element_text_and_html_by_tag,
|
||||
parse_duration,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class ListenNotesIE(InfoExtractor):
|
||||
@@ -22,14 +20,14 @@ class ListenNotesIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'KrDgvNb_u1n',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:32236591a921adf17bbdbf0441b6c0e9',
|
||||
'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd',
|
||||
'duration': 2148.0,
|
||||
'channel': 'Thriving on Overload',
|
||||
'title': r're:Tim O’Reilly on noticing things other people .{113}',
|
||||
'description': r're:(?s)‘’We shape reality by what we notice and .{27459}',
|
||||
'duration': 2215.0,
|
||||
'channel': 'Amplifying Cognition',
|
||||
'channel_id': 'ed84wITivxF',
|
||||
'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/',
|
||||
'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/amplifying-cognition-ross-dawson-Iemft4Gdr0k-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/amplifying-cognition-ross-dawson-ed84wITivxF/',
|
||||
'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
|
||||
},
|
||||
}, {
|
||||
@@ -39,13 +37,13 @@ class ListenNotesIE(InfoExtractor):
|
||||
'id': 'lwEA3154JzG',
|
||||
'ext': 'mp3',
|
||||
'title': 'Episode 177: WireGuard with Jason Donenfeld',
|
||||
'description': 'md5:24744f36456a3e95f83c1193a3458594',
|
||||
'description': r're:(?s)Jason Donenfeld lead developer joins us this hour to discuss WireGuard, .{3169}',
|
||||
'duration': 3861.0,
|
||||
'channel': 'Ask Noah Show',
|
||||
'channel_id': '4DQTzdS5-j7',
|
||||
'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg',
|
||||
'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-gD7vG150cxf-4DQTzdS5-j7.300x300.jpg',
|
||||
'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
|
||||
},
|
||||
}]
|
||||
@@ -70,7 +68,7 @@ class ListenNotesIE(InfoExtractor):
|
||||
'id': audio_id,
|
||||
'url': data['audio'],
|
||||
'title': (data.get('data-title')
|
||||
or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
|
||||
or traverse_obj(webpage, ({find_element(tag='h1')}, {clean_html}))
|
||||
or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
|
||||
'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
|
||||
or strip_or_none(description)),
|
||||
|
||||
@@ -1,30 +1,32 @@
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unsmuggle_url,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class LiTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
|
||||
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P<id>[\w-]+)'
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s'
|
||||
_GEO_COUNTRIES = ['TW']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00041610',
|
||||
'info_dict': {
|
||||
'id': 'VOD00041606',
|
||||
'title': '花千骨',
|
||||
},
|
||||
'playlist_count': 51, # 50 episodes + 1 trailer
|
||||
}, {
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00041610',
|
||||
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
|
||||
'info_dict': {
|
||||
'id': 'VOD00041610',
|
||||
@@ -32,16 +34,15 @@ class LiTVIE(InfoExtractor):
|
||||
'title': '花千骨第1集',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
|
||||
'categories': ['奇幻', '愛情', '中國', '仙俠'],
|
||||
'categories': ['奇幻', '愛情', '仙俠', '古裝'],
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
'skip': 'Georestricted to Taiwan',
|
||||
}, {
|
||||
'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00044841',
|
||||
'md5': '88322ea132f848d6e3e18b32a832b918',
|
||||
'info_dict': {
|
||||
'id': 'VOD00044841',
|
||||
@@ -55,94 +56,62 @@ class LiTVIE(InfoExtractor):
|
||||
def _extract_playlist(self, playlist_data, content_type):
|
||||
all_episodes = [
|
||||
self.url_result(smuggle_url(
|
||||
self._URL_TEMPLATE % (content_type, episode['contentId']),
|
||||
self._URL_TEMPLATE % (content_type, episode['content_id']),
|
||||
{'force_noplaylist': True})) # To prevent infinite recursion
|
||||
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
|
||||
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))]
|
||||
|
||||
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
|
||||
return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
|
||||
if self._search_regex(
|
||||
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
|
||||
webpage, 'meta refresh redirect', default=False, group=0):
|
||||
raise ExtractorError('No such content found', expected=True)
|
||||
program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {}
|
||||
playlist_data = traverse_obj(vod_data, ('seriesTree'))
|
||||
if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data):
|
||||
return self._extract_playlist(playlist_data, program_info.get('content_type'))
|
||||
|
||||
program_info = self._parse_json(self._search_regex(
|
||||
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||
video_id)
|
||||
asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str}))
|
||||
if asset_id: # This is a VOD
|
||||
media_type = 'vod'
|
||||
else: # This is a live stream
|
||||
asset_id = program_info['content_id']
|
||||
media_type = program_info['content_type']
|
||||
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||
if puid:
|
||||
endpoint = 'get-urls'
|
||||
else:
|
||||
puid = str(uuid.uuid4())
|
||||
endpoint = 'get-urls-no-auth'
|
||||
video_data = self._download_json(
|
||||
f'https://www.litv.tv/api/{endpoint}', video_id,
|
||||
data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
# In browsers `getProgramInfo` request is always issued. Usually this
|
||||
# endpoint gives the same result as the data embedded in the webpage.
|
||||
# If, for some reason, there are no embedded data, we do an extra request.
|
||||
if 'assetId' not in program_info:
|
||||
program_info = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
|
||||
query={'contentId': video_id},
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
series_id = program_info['seriesId']
|
||||
if self._yes_playlist(series_id, video_id, smuggled_data):
|
||||
playlist_data = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
|
||||
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
|
||||
return self._extract_playlist(playlist_data, program_info['contentType'])
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||
webpage, 'video data', default='{}'), video_id)
|
||||
if not video_data:
|
||||
payload = {'assetId': program_info['assetId']}
|
||||
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||
if puid:
|
||||
payload.update({
|
||||
'type': 'auth',
|
||||
'puid': puid,
|
||||
})
|
||||
endpoint = 'getUrl'
|
||||
else:
|
||||
payload.update({
|
||||
'watchDevices': program_info['watchDevices'],
|
||||
'contentType': program_info['contentType'],
|
||||
})
|
||||
endpoint = 'getMainUrlNoAuth'
|
||||
video_data = self._download_json(
|
||||
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
if not video_data.get('fullpath'):
|
||||
error_msg = video_data.get('errorMessage')
|
||||
if error_msg == 'vod.error.outsideregionerror':
|
||||
if error := traverse_obj(video_data, ('error', {dict})):
|
||||
error_msg = traverse_obj(error, ('message', {str}))
|
||||
if error_msg and 'OutsideRegionError' in error_msg:
|
||||
self.raise_geo_restricted('This video is available in Taiwan only')
|
||||
if error_msg:
|
||||
elif error_msg:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True)
|
||||
raise ExtractorError(f'Unexpected result from {self.IE_NAME}')
|
||||
raise ExtractorError(f'Unexpected error from {self.IE_NAME}')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['fullpath'], video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls')
|
||||
for a_format in formats:
|
||||
# LiTV HLS segments doesn't like compressions
|
||||
a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
|
||||
|
||||
title = program_info['title'] + program_info.get('secondaryMark', '')
|
||||
description = program_info.get('description')
|
||||
thumbnail = program_info.get('imageFile')
|
||||
categories = [item['name'] for item in program_info.get('category', [])]
|
||||
episode = int_or_none(program_info.get('episode'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'episode_number': episode,
|
||||
'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info),
|
||||
**traverse_obj(program_info, {
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}),
|
||||
'categories': ('genres', ..., 'name', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,86 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||
'info_dict': {
|
||||
'id': '10809',
|
||||
'ext': 'mp4',
|
||||
'title': "Put'ka: Trys Klausimai",
|
||||
'upload_date': '20161216',
|
||||
'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
|
||||
'age_limit': 18,
|
||||
'duration': 117,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1481904000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
|
||||
'info_dict': {
|
||||
'id': '10467',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nėrdas: Kompiuterio Valymas',
|
||||
'upload_date': '20150113',
|
||||
'description': 'md5:7352d113a242a808676ff17e69db6a69',
|
||||
'age_limit': 18,
|
||||
'duration': 346,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421164800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_LIMITS = {
|
||||
'N-7': 7,
|
||||
'N-14': 14,
|
||||
'S': 18,
|
||||
}
|
||||
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
|
||||
display_id)['videoConfig']['videoInfo']
|
||||
|
||||
video_id = str(video_info['id'])
|
||||
title = video_info['title']
|
||||
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'description': clean_html(video_info.get('htmlDescription')),
|
||||
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
|
||||
'timestamp': parse_iso8601(video_info.get('airDate')),
|
||||
'view_count': int_or_none(video_info.get('viewsCount')),
|
||||
}
|
||||
|
||||
|
||||
class LnkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'
|
||||
|
||||
@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response
|
||||
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': '8488817242a0db1cb2ad0ea522553cf6',
|
||||
'md5': 'b321d261656848c184a94e3b93eae28d',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
|
||||
'timestamp': 1657216459,
|
||||
'duration': 181,
|
||||
},
|
||||
'params': {'format': 'bestvideo'}, # Test video-only fixup
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
|
||||
format_url = format_url.replace('-split.m3u8', '.m3u8')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
|
||||
# Sometimes only split video/audio formats are available, need to fixup video-only formats
|
||||
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
|
||||
for fmt in m3u8_formats:
|
||||
if is_not_premerged and fmt.get('vcodec') != 'none':
|
||||
fmt['acodec'] = 'none'
|
||||
yield {
|
||||
**fmt,
|
||||
'url': update_url(fmt['url'], query=query),
|
||||
|
||||
@@ -114,7 +114,7 @@ class LSMLREmbedIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = traverse_obj(query, (
|
||||
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
|
||||
('show', 'id'), 0, {int_or_none}, filter, {str_or_none}), get_all=False)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data, media_data = self._search_regex(
|
||||
|
||||
@@ -57,6 +57,6 @@ class MagentaMusikIE(InfoExtractor):
|
||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
||||
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
|
||||
'release_year': ('yearOfProduction', {int_or_none}),
|
||||
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
|
||||
'categories': ('mainGenre', {str}, all, filter),
|
||||
})),
|
||||
}
|
||||
|
||||
@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
|
||||
video_data = None
|
||||
|
||||
# fix meta_url if missing the host address
|
||||
if re.match(r'^\/\+\/', meta_url):
|
||||
if re.match(r'\/\+\/', meta_url):
|
||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||
|
||||
if meta_url:
|
||||
|
||||
@@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
|
||||
(?P<id>[^/#?_]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
|
||||
'info_dict': {
|
||||
'id': '668177',
|
||||
'title': 'Az ajtó',
|
||||
'display_id': 'az-ajto',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
|
||||
},
|
||||
}, {
|
||||
# (old) mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||
'info_dict': {
|
||||
@@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230903',
|
||||
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) m4sport
|
||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||
@@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230908',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# m4sport with *video/ url and no date
|
||||
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
|
||||
@@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) hirado
|
||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||
@@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230911',
|
||||
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}, {
|
||||
# (old) petofilive
|
||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||
@@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230909',
|
||||
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -143,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
|
||||
if not playlist_url:
|
||||
raise ExtractorError('Unable to extract playlist url')
|
||||
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ class MediaStreamBaseIE(InfoExtractor):
|
||||
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||
|
||||
def _extract_mediastream_urls(self, webpage):
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None, default={})), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
IE_NAME = 'MangoTV'
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ class MicrosoftEmbedIE(InfoExtractor):
|
||||
'timestamp': 1631658316,
|
||||
'upload_date': '20210914',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: syntax error: line 1, column 0'],
|
||||
}]
|
||||
_API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/'
|
||||
|
||||
@@ -36,11 +37,11 @@ class MicrosoftEmbedIE(InfoExtractor):
|
||||
formats = []
|
||||
for source_type, source in metadata['streams'].items():
|
||||
if source_type == 'smooth_Streaming':
|
||||
formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss'))
|
||||
formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss', fatal=False))
|
||||
elif source_type == 'apple_HTTP_Live_Streaming':
|
||||
formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4'))
|
||||
formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4', fatal=False))
|
||||
elif source_type == 'mPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(source['url'], video_id))
|
||||
formats.extend(self._extract_mpd_formats(source['url'], video_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source_type,
|
||||
|
||||
@@ -1,291 +0,0 @@
|
||||
import functools
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class MildomBaseIE(InfoExtractor):
|
||||
_GUEST_ID = None
|
||||
|
||||
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
|
||||
if not self._GUEST_ID:
|
||||
self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
|
||||
|
||||
content = self._download_json(
|
||||
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
|
||||
headers={'Content-Type': 'application/json'} if body else {},
|
||||
query={
|
||||
'__guest_id': self._GUEST_ID,
|
||||
'__platform': 'web',
|
||||
**(query or {}),
|
||||
})
|
||||
|
||||
if content['code'] != 0:
|
||||
raise ExtractorError(
|
||||
f'Mildom says: {content["message"]} (code {content["code"]})',
|
||||
expected=True)
|
||||
return content['body']
|
||||
|
||||
|
||||
class MildomIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom'
|
||||
IE_DESC = 'Record ongoing live by specific user in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
|
||||
|
||||
enterstudio = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
|
||||
note='Downloading live metadata', query={'user_id': video_id})
|
||||
result_video_id = enterstudio.get('log_id', video_id)
|
||||
|
||||
servers = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
|
||||
note='Downloading live server list', query={
|
||||
'user_id': video_id,
|
||||
'live_server_type': 'hls',
|
||||
})
|
||||
|
||||
playback_token = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
|
||||
note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
|
||||
playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
|
||||
if not playback_token:
|
||||
raise ExtractorError('Failed to obtain live playback token')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
|
||||
result_video_id, 'mp4', headers={
|
||||
'Referer': 'https://www.mildom.com/',
|
||||
'Origin': 'https://www.mildom.com',
|
||||
})
|
||||
|
||||
for fmt in formats:
|
||||
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
|
||||
|
||||
return {
|
||||
'id': result_video_id,
|
||||
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
|
||||
'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
|
||||
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
|
||||
'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
|
||||
'uploader_id': video_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class MildomVodIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom:vod'
|
||||
IE_DESC = 'VOD in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
|
||||
'info_dict': {
|
||||
'id': '10882672-1597662269',
|
||||
'ext': 'mp4',
|
||||
'title': '始めてのミルダム配信じゃぃ!',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'upload_date': '20200817',
|
||||
'duration': 4138.37,
|
||||
'description': 'ゲームをしたくて!',
|
||||
'timestamp': 1597662269.0,
|
||||
'uploader_id': '10882672',
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
|
||||
'info_dict': {
|
||||
'id': '10882672-1597758589870-477',
|
||||
'ext': 'mp4',
|
||||
'title': '【kson】感染メイズ!麻酔銃で無双する',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'timestamp': 1597759093.0,
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
'duration': 4302.58,
|
||||
'uploader_id': '10882672',
|
||||
'description': 'このステージ絶対乗り越えたい',
|
||||
'upload_date': '20200818',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
|
||||
'info_dict': {
|
||||
'id': '10882672-buha9td2lrn97fk2jme0',
|
||||
'ext': 'mp4',
|
||||
'title': '【kson組長】CART RACER!!!',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'uploader_id': '10882672',
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
'upload_date': '20201104',
|
||||
'timestamp': 1604494797.0,
|
||||
'duration': 4657.25,
|
||||
'description': 'WTF',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
|
||||
|
||||
autoplay = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
|
||||
note='Downloading playback metadata', query={
|
||||
'v_id': video_id,
|
||||
})['playback']
|
||||
|
||||
formats = [{
|
||||
'url': autoplay['audio_url'],
|
||||
'format_id': 'audio',
|
||||
'protocol': 'm3u8_native',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'aac',
|
||||
'ext': 'm4a',
|
||||
}]
|
||||
for fmt in autoplay['video_link']:
|
||||
formats.append({
|
||||
'format_id': 'video-{}'.format(fmt['name']),
|
||||
'url': fmt['url'],
|
||||
'protocol': 'm3u8_native',
|
||||
'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
|
||||
'height': fmt['level'],
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
|
||||
'description': traverse_obj(autoplay, 'video_intro'),
|
||||
'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
|
||||
'duration': float_or_none(autoplay.get('video_length'), scale=1000),
|
||||
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
|
||||
'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
|
||||
'uploader_id': user_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MildomClipIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom:clip'
|
||||
IE_DESC = 'Clip in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
|
||||
'info_dict': {
|
||||
'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
|
||||
'title': '全然違ったよ',
|
||||
'timestamp': 1619181890,
|
||||
'duration': 59,
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': 'ざきんぽ',
|
||||
'uploader_id': '10042245',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
|
||||
'info_dict': {
|
||||
'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
|
||||
'title': 'かっこいい',
|
||||
'timestamp': 1621094003,
|
||||
'duration': 59,
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': '(ルーキー',
|
||||
'uploader_id': '10111524',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
|
||||
'info_dict': {
|
||||
'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
|
||||
'title': 'あ',
|
||||
'timestamp': 1614769431,
|
||||
'duration': 31,
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
|
||||
'uploader_id': '10660174',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
|
||||
|
||||
clip_detail = self._call_api(
|
||||
'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
|
||||
note='Downloading playback metadata', query={
|
||||
'clip_id': video_id,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(
|
||||
('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
|
||||
'timestamp': float_or_none(clip_detail.get('create_time')),
|
||||
'duration': float_or_none(clip_detail.get('length')),
|
||||
'thumbnail': clip_detail.get('cover'),
|
||||
'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
|
||||
'uploader_id': user_id,
|
||||
|
||||
'url': clip_detail['url'],
|
||||
'ext': determine_ext(clip_detail.get('url'), 'mp4'),
|
||||
}
|
||||
|
||||
|
||||
class MildomUserVodIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom:user:vod'
|
||||
IE_DESC = 'Download all VODs from specific user in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mildom.com/profile/10093333',
|
||||
'info_dict': {
|
||||
'id': '10093333',
|
||||
'title': 'Uploads from ねこばたけ',
|
||||
},
|
||||
'playlist_mincount': 732,
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/profile/10882672',
|
||||
'info_dict': {
|
||||
'id': '10882672',
|
||||
'title': 'Uploads from kson組長(けいそん)',
|
||||
},
|
||||
'playlist_mincount': 201,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, user_id, page):
|
||||
page += 1
|
||||
reply = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
|
||||
user_id, note=f'Downloading page {page}', query={
|
||||
'user_id': user_id,
|
||||
'page': page,
|
||||
'limit': '30',
|
||||
})
|
||||
if not reply:
|
||||
return
|
||||
for x in reply:
|
||||
v_id = x.get('v_id')
|
||||
if not v_id:
|
||||
continue
|
||||
yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
|
||||
|
||||
profile = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
|
||||
query={'user_id': user_id}, note='Downloading user profile')['user_info']
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
|
||||
user_id, f'Uploads from {profile["loginname"]}')
|
||||
@@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_BASE_URL = 'http://ocw.mit.edu/'
|
||||
|
||||
_TESTS = [
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
from .telecinco import TelecincoIE
|
||||
from .telecinco import TelecincoBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
class MiTeleIE(TelecincoBaseIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
||||
'info_dict': {
|
||||
@@ -27,6 +26,7 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1471209401,
|
||||
'upload_date': '20160814',
|
||||
},
|
||||
'skip': 'HTTP Error 404 Not Found',
|
||||
}, {
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
@@ -49,6 +49,26 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/',
|
||||
'info_dict': {
|
||||
'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Horizonte Temporada 5 Programa 171',
|
||||
'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3',
|
||||
'episode': 'Las Zonas de Bajas Emisiones, a debate',
|
||||
'episode_number': 171,
|
||||
'season': 'Season 5',
|
||||
'season_number': 5,
|
||||
'series': 'Horizonte',
|
||||
'duration': 7012,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727416450,
|
||||
'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg',
|
||||
'age_limit': 12,
|
||||
},
|
||||
'params': {'geo_bypass_country': 'ES'},
|
||||
}, {
|
||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||
'only_matching': True,
|
||||
@@ -60,9 +80,9 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
pre_player = self._parse_json(self._search_regex(
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
||||
webpage, 'Pre Player'), display_id)['prePlayer']
|
||||
pre_player = self._search_json(
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
||||
webpage, 'Pre Player', display_id)['prePlayer']
|
||||
title = pre_player['title']
|
||||
video_info = self._parse_content(pre_player['video'], url)
|
||||
content = pre_player.get('content') or {}
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
class MixchIE(InfoExtractor):
|
||||
IE_NAME = 'mixch'
|
||||
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://mixch\.tv/u/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mixch.tv/u/16943797/live',
|
||||
@@ -66,7 +66,7 @@ class MixchIE(InfoExtractor):
|
||||
note='Downloading comments', errnote='Failed to download comments'), (..., {
|
||||
'author': ('name', {str}),
|
||||
'author_id': ('user_id', {str_or_none}),
|
||||
'id': ('message_id', {str}, {lambda x: x or None}),
|
||||
'id': ('message_id', {str}, filter),
|
||||
'text': ('body', {str}),
|
||||
'timestamp': ('created', {int}),
|
||||
}))
|
||||
@@ -74,7 +74,7 @@ class MixchIE(InfoExtractor):
|
||||
|
||||
class MixchArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'mixch:archive'
|
||||
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://mixch\.tv/archive/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mixch.tv/archive/421',
|
||||
@@ -116,3 +116,56 @@ class MixchArchiveIE(InfoExtractor):
|
||||
'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
|
||||
'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
|
||||
}
|
||||
|
||||
|
||||
class MixchMovieIE(InfoExtractor):
|
||||
IE_NAME = 'mixch:movie'
|
||||
_VALID_URL = r'https?://mixch\.tv/m/(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mixch.tv/m/Ve8KNkJ5',
|
||||
'info_dict': {
|
||||
'id': 'Ve8KNkJ5',
|
||||
'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'ミス東大No.5 松藤百香🍑💫',
|
||||
'uploader_id': '12299174',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'timestamp': 1724070828,
|
||||
'uploader_url': 'https://mixch.tv/u/12299174',
|
||||
'live_status': 'not_live',
|
||||
'upload_date': '20240819',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mixch.tv/m/61DzpIKE',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
f'https://mixch.tv/api-web/movies/{video_id}', video_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': [{
|
||||
'format_id': 'mp4',
|
||||
'url': data['movie']['file'],
|
||||
'ext': 'mp4',
|
||||
}],
|
||||
**traverse_obj(data, {
|
||||
'title': ('movie', 'title', {str}),
|
||||
'thumbnail': ('movie', 'thumbnailURL', {url_or_none}),
|
||||
'uploader': ('ownerInfo', 'name', {str}),
|
||||
'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}),
|
||||
'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}),
|
||||
'view_count': ('ownerInfo', 'view', {int_or_none}),
|
||||
'like_count': ('movie', 'favCount', {int_or_none}),
|
||||
'comment_count': ('movie', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('movie', 'published', {int_or_none}),
|
||||
'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter),
|
||||
}),
|
||||
'live_status': 'not_live',
|
||||
}
|
||||
|
||||
121
yt_dlp/extractor/mojevideo.py
Normal file
121
yt_dlp/extractor/mojevideo.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, remove_end, update_url_query
|
||||
|
||||
|
||||
class MojevideoIE(InfoExtractor):
|
||||
IE_DESC = 'mojevideo.sk'
|
||||
_VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P<id>\w+)/(?P<display_id>[\w()]+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html',
|
||||
'md5': '384a4628bd2bbd261c5206cf77c38c17',
|
||||
'info_dict': {
|
||||
'id': '3d17c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chlapci dobetónovali sme, máme hotovo!',
|
||||
'display_id': 'chlapci_dobetonovali_sme_mame_hotovo',
|
||||
'description': 'md5:a0822126044050d304a9ef58c92ddb34',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg',
|
||||
'duration': 21.0,
|
||||
'upload_date': '20230919',
|
||||
'timestamp': 1695129706,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html',
|
||||
'md5': '517c3e111c53a67d10b429c1f344ba2f',
|
||||
'info_dict': {
|
||||
'id': '14677',
|
||||
'ext': 'mp4',
|
||||
'title': 'Deň blbec?',
|
||||
'display_id': 'den_blbec',
|
||||
'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg',
|
||||
'duration': 100.0,
|
||||
'upload_date': '20120515',
|
||||
'timestamp': 1337076481,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html',
|
||||
'md5': '64599a23d3ac31cf2fe069e4353d8162',
|
||||
'info_dict': {
|
||||
'id': '2feb2',
|
||||
'ext': 'mp4',
|
||||
'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)',
|
||||
'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)',
|
||||
'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg',
|
||||
'duration': 240.0,
|
||||
'upload_date': '20190708',
|
||||
'timestamp': 1562576592,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id_dec = self._search_regex(
|
||||
r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16))
|
||||
video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry')
|
||||
video_hashes = self._search_json(
|
||||
r'\bvHash\s*=', webpage, 'video hashes', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
|
||||
('', 1, 'normálna kvalita'),
|
||||
('_lq', 0, 'nízka kvalita'),
|
||||
('_hd', 2, 'HD-720p'),
|
||||
('_fhd', 3, 'FULL HD-1080p'),
|
||||
('_2k', 4, '2K-1440p'),
|
||||
]):
|
||||
formats.append({
|
||||
'format_id': f'mp4-{quality}',
|
||||
'quality': quality,
|
||||
'format_note': format_note,
|
||||
'url': update_url_query(
|
||||
f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', {
|
||||
'md5': video_hash,
|
||||
'expires': video_exp,
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'title': (self._og_search_title(webpage, default=None)
|
||||
or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')),
|
||||
'description': self._og_search_description(webpage),
|
||||
**self._search_json_ld(webpage, video_id, default={}),
|
||||
}
|
||||
@@ -4,15 +4,11 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_class,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class MonstercatIE(InfoExtractor):
|
||||
@@ -26,19 +22,21 @@ class MonstercatIE(InfoExtractor):
|
||||
'thumbnail': 'https://www.monstercat.com/release/742779548009/cover',
|
||||
'release_date': '20230711',
|
||||
'album': 'The Secret Language of Trees',
|
||||
'album_artist': 'BT',
|
||||
'album_artists': ['BT'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_tracks(self, table, album_meta):
|
||||
for td in re.findall(r'<tr[^<]*>((?:(?!</tr>)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag
|
||||
title = clean_html(try_call(
|
||||
lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' <span')[0]))
|
||||
ids = extract_attributes(try_call(lambda: get_element_html_by_class('btn-play cursor-pointer mr-small', td)) or '')
|
||||
title = traverse_obj(td, (
|
||||
{find_element(cls='d-inline-flex flex-column')},
|
||||
{lambda x: x.partition(' <span')}, 0, {clean_html}))
|
||||
ids = traverse_obj(td, (
|
||||
{find_element(cls='btn-play cursor-pointer mr-small', html=True)}, {extract_attributes})) or {}
|
||||
track_id = ids.get('data-track-id')
|
||||
release_id = ids.get('data-release-id')
|
||||
|
||||
track_number = int_or_none(try_call(lambda: get_element_by_class('py-xsmall', td)))
|
||||
track_number = traverse_obj(td, ({find_element(cls='py-xsmall')}, {int_or_none}))
|
||||
if not track_id or not release_id:
|
||||
self.report_warning(f'Skipping track {track_number}, ID(s) not found')
|
||||
self.write_debug(f'release_id={release_id!r} track_id={track_id!r}')
|
||||
@@ -48,7 +46,7 @@ class MonstercatIE(InfoExtractor):
|
||||
'title': title,
|
||||
'track': title,
|
||||
'track_number': track_number,
|
||||
'artist': clean_html(try_call(lambda: get_element_by_class('d-block fs-xxsmall', td))),
|
||||
'artists': traverse_obj(td, ({find_element(cls='d-block fs-xxsmall')}, {clean_html}, all)),
|
||||
'url': f'https://www.monstercat.com/api/release/{release_id}/track-stream/{track_id}',
|
||||
'id': track_id,
|
||||
'ext': 'mp3',
|
||||
@@ -57,20 +55,19 @@ class MonstercatIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
url_id = self._match_id(url)
|
||||
html = self._download_webpage(url, url_id)
|
||||
# wrap all `get_elements` in `try_call`, HTMLParser has problems with site's html
|
||||
tracklist_table = try_call(lambda: get_element_by_class('table table-small', html)) or ''
|
||||
|
||||
title = try_call(lambda: get_element_text_and_html_by_tag('h1', html)[0])
|
||||
date = traverse_obj(html, ({lambda html: get_element_by_class('font-italic mb-medium d-tablet-none d-phone-block',
|
||||
html).partition('Released ')}, 2, {strip_or_none}, {unified_strdate}))
|
||||
# NB: HTMLParser may choke on this html; use {find_element} or try_call(lambda: get_element...)
|
||||
tracklist_table = traverse_obj(html, {find_element(cls='table table-small')}) or ''
|
||||
title = traverse_obj(html, ({find_element(tag='h1')}, {clean_html}))
|
||||
|
||||
album_meta = {
|
||||
'title': title,
|
||||
'album': title,
|
||||
'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover',
|
||||
'album_artist': try_call(
|
||||
lambda: get_element_by_class('h-normal text-uppercase mb-desktop-medium mb-smallish', html)),
|
||||
'release_date': date,
|
||||
'album_artists': traverse_obj(html, (
|
||||
{find_element(cls='h-normal text-uppercase mb-desktop-medium mb-smallish')}, {clean_html}, all)),
|
||||
'release_date': traverse_obj(html, (
|
||||
{find_element(cls='font-italic mb-medium d-tablet-none d-phone-block')},
|
||||
{lambda x: x.partition('Released ')}, 2, {strip_or_none}, {unified_strdate})),
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -86,7 +86,7 @@ class NebulaBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_video_metadata(self, episode):
|
||||
channel_url = traverse_obj(
|
||||
episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
|
||||
episode, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all=False)
|
||||
return {
|
||||
'id': episode['id'].partition(':')[2],
|
||||
**traverse_obj(episode, {
|
||||
|
||||
@@ -6,12 +6,10 @@ from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_text_and_html_by_tag,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class NekoHackerIE(InfoExtractor):
|
||||
@@ -35,7 +33,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20221101',
|
||||
'album': 'Nekoverse',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'Spaceship',
|
||||
'track_number': 1,
|
||||
'duration': 195.0,
|
||||
@@ -53,7 +51,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20221101',
|
||||
'album': 'Nekoverse',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'City Runner',
|
||||
'track_number': 2,
|
||||
'duration': 148.0,
|
||||
@@ -71,7 +69,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20221101',
|
||||
'album': 'Nekoverse',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'Nature Talk',
|
||||
'track_number': 3,
|
||||
'duration': 174.0,
|
||||
@@ -89,7 +87,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20221101',
|
||||
'album': 'Nekoverse',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'Crystal World',
|
||||
'track_number': 4,
|
||||
'duration': 199.0,
|
||||
@@ -115,7 +113,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20210115',
|
||||
'album': '進め!むじなカンパニー',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
|
||||
'track_number': 1,
|
||||
},
|
||||
@@ -132,7 +130,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20210115',
|
||||
'album': '進め!むじなカンパニー',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
|
||||
'track_number': 2,
|
||||
},
|
||||
@@ -149,7 +147,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20210115',
|
||||
'album': '進め!むじなカンパニー',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': '進め!むじなカンパニー (instrumental)',
|
||||
'track_number': 3,
|
||||
},
|
||||
@@ -166,7 +164,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20210115',
|
||||
'album': '進め!むじなカンパニー',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'むじな de なじむ (instrumental)',
|
||||
'track_number': 4,
|
||||
},
|
||||
@@ -181,14 +179,17 @@ class NekoHackerIE(InfoExtractor):
|
||||
playlist = get_element_by_class('playlist', webpage)
|
||||
|
||||
if not playlist:
|
||||
iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or ''
|
||||
iframe_src = url_or_none(extract_attributes(iframe).get('src'))
|
||||
iframe_src = traverse_obj(webpage, (
|
||||
{find_element(tag='iframe', html=True)}, {extract_attributes}, 'src', {url_or_none}))
|
||||
if not iframe_src:
|
||||
raise ExtractorError('No playlist or embed found in webpage')
|
||||
elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src):
|
||||
raise ExtractorError('Spotify embeds are not supported', expected=True)
|
||||
return self.url_result(url, 'Generic')
|
||||
|
||||
player_params = self._search_json(
|
||||
r'var srp_player_params_[\da-f]+\s*=', webpage, 'player params', playlist_id, default={})
|
||||
|
||||
entries = []
|
||||
for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1):
|
||||
entry = traverse_obj(extract_attributes(track), {
|
||||
@@ -200,12 +201,12 @@ class NekoHackerIE(InfoExtractor):
|
||||
'album': 'data-albumtitle',
|
||||
'duration': ('data-tracktime', {parse_duration}),
|
||||
'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
|
||||
'thumbnail': ('data-albumart', {url_or_none}),
|
||||
})
|
||||
entries.append({
|
||||
**entry,
|
||||
'thumbnail': url_or_none(player_params.get('artwork')),
|
||||
'track_number': track_number,
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3' if entry['ext'] == 'mp3' else None,
|
||||
})
|
||||
|
||||
@@ -36,10 +36,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
_API_BASE = 'http://music.163.com/api/'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@staticmethod
|
||||
def _kilo_or_none(value):
|
||||
return int_or_none(value, scale=1000)
|
||||
|
||||
def _create_eapi_cipher(self, api_path, query_body, cookies):
|
||||
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
|
||||
|
||||
@@ -101,7 +97,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(song, {
|
||||
'ext': ('type', {str}),
|
||||
'abr': ('br', {self._kilo_or_none}),
|
||||
'abr': ('br', {int_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
@@ -282,9 +278,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
**lyric_data,
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
|
||||
'timestamp': ('album', 'publishTime', {int_or_none(scale=1000)}),
|
||||
'thumbnail': ('album', 'picUrl', {url_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'album': ('album', 'name', {str}),
|
||||
'average_rating': ('score', {int_or_none}),
|
||||
}),
|
||||
@@ -440,7 +436,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
'tags': ('tags', ..., {str}),
|
||||
'uploader': ('creator', 'nickname', {str}),
|
||||
'uploader_id': ('creator', 'userId', {str_or_none}),
|
||||
'timestamp': ('updateTime', {self._kilo_or_none}),
|
||||
'timestamp': ('updateTime', {int_or_none(scale=1000)}),
|
||||
}))
|
||||
if traverse_obj(info, ('playlist', 'specialType')) == 10:
|
||||
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
|
||||
@@ -517,10 +513,10 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
|
||||
'description': (('desc', 'briefDesc'), {str}, filter),
|
||||
'upload_date': ('publishTime', {unified_strdate}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
@@ -588,7 +584,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'description': ('description', {str}),
|
||||
'creator': ('dj', 'brand', {str}),
|
||||
'thumbnail': ('coverUrl', {url_or_none}),
|
||||
'timestamp': ('createTime', {self._kilo_or_none}),
|
||||
'timestamp': ('createTime', {int_or_none(scale=1000)}),
|
||||
})
|
||||
|
||||
if not self._yes_playlist(
|
||||
@@ -598,7 +594,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
return {
|
||||
'id': str(info['mainSong']['id']),
|
||||
'formats': formats,
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {int_or_none(scale=1000)})),
|
||||
**metainfo,
|
||||
}
|
||||
|
||||
|
||||
@@ -371,7 +371,7 @@ class NexxIE(InfoExtractor):
|
||||
# not all videos work via arc, e.g. nexx:741:1269984
|
||||
if not video:
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(1e4, 99999)}{random.randint(1, 9)}'
|
||||
device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(10000, 99999)}{random.randint(1, 9)}'
|
||||
|
||||
result = self._call_api(domain_id, 'session/init', video_id, data={
|
||||
'nxp_devh': device_id,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user