mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-24 00:49:06 +00:00
Merge branch 'master' into jsi
This commit is contained in:
@@ -217,6 +217,7 @@ from .bbc import (
|
||||
BBCCoUkIPlayerGroupIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
)
|
||||
from .beacon import BeaconTvIE
|
||||
from .beatbump import (
|
||||
BeatBumpPlaylistIE,
|
||||
BeatBumpVideoIE,
|
||||
@@ -277,6 +278,7 @@ from .bleacherreport import (
|
||||
from .blerp import BlerpIE
|
||||
from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bluesky import BlueskyIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boosty import BoostyIE
|
||||
@@ -362,7 +364,10 @@ from .ccc import (
|
||||
)
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .cda import (
|
||||
CDAIE,
|
||||
CDAFolderIE,
|
||||
)
|
||||
from .cellebrite import CellebriteIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .cgtn import CGTNIE
|
||||
@@ -397,8 +402,6 @@ from .cmt import CMTIE
|
||||
from .cnbc import CNBCVideoIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNArticleIE,
|
||||
CNNBlogsIE,
|
||||
CNNIndonesiaIE,
|
||||
)
|
||||
from .comedycentral import (
|
||||
@@ -729,6 +732,7 @@ from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .germanupa import GermanupaIE
|
||||
from .getcourseru import (
|
||||
GetCourseRuIE,
|
||||
GetCourseRuPlayerIE,
|
||||
@@ -822,7 +826,10 @@ from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .huya import HuyaLiveIE
|
||||
from .huya import (
|
||||
HuyaLiveIE,
|
||||
HuyaVideoIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .hypergryph import MonsterSirenHypergryphMusicIE
|
||||
from .hytale import HytaleIE
|
||||
@@ -945,6 +952,7 @@ from .kick import (
|
||||
)
|
||||
from .kicker import KickerIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kika import KikaIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
@@ -1036,10 +1044,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .livestreamfails import LivestreamfailsIE
|
||||
from .lnkgo import (
|
||||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .lnk import LnkIE
|
||||
from .loom import (
|
||||
LoomFolderIE,
|
||||
LoomIE,
|
||||
@@ -1164,6 +1169,7 @@ from .mlb import (
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mocha import MochaVideoIE
|
||||
from .mojevideo import MojevideoIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .motherless import (
|
||||
@@ -1810,6 +1816,7 @@ from .screen9 import Screen9IE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastify import ScreencastifyIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenrec import ScreenRecIE
|
||||
from .scrippsnetworks import (
|
||||
ScrippsNetworksIE,
|
||||
ScrippsNetworksWatchIE,
|
||||
@@ -1820,6 +1827,7 @@ from .scte import (
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .sejmpl import SejmIE
|
||||
from .sen import SenIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import (
|
||||
SenateGovIE,
|
||||
@@ -1875,6 +1883,7 @@ from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snapchat import SnapchatSpotlightIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
@@ -2311,6 +2320,7 @@ from .videomore import (
|
||||
VideomoreVideoIE,
|
||||
)
|
||||
from .videopress import VideoPressIE
|
||||
from .vidflex import VidflexIE
|
||||
from .vidio import (
|
||||
VidioIE,
|
||||
VidioLiveIE,
|
||||
|
||||
@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
'skip': 'This program is not currently available in ABC iview',
|
||||
}, {
|
||||
'url': 'https://iview.abc.net.au/show/inbestigators',
|
||||
'info_dict': {
|
||||
'id': '175343-1',
|
||||
'title': 'Series 1',
|
||||
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
|
||||
'series': 'The Inbestigators',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage_data = self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
video_data = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
|
||||
transform_source=lambda x: x.encode().decode('unicode_escape'),
|
||||
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
|
||||
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'AcademicEarth:Course'
|
||||
_TEST = {
|
||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||
|
||||
@@ -49,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '9841',
|
||||
@@ -71,10 +71,7 @@ class ADNIE(ADNBaseIE):
|
||||
},
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
@@ -167,7 +164,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
'username': username,
|
||||
})) or {}).get('accessToken')
|
||||
if access_token:
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
self._HEADERS['Authorization'] = f'Bearer {access_token}'
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
@@ -178,6 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
@@ -218,7 +216,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang or 'fr',
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
@@ -257,6 +254,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
f'Downloading {format_id} {quality} JSON metadata',
|
||||
headers=self._HEADERS,
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
@@ -277,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + f'video/{video_id}', video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
@@ -299,9 +297,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new',
|
||||
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
@@ -312,16 +310,14 @@ class ADNSeasonIE(ADNBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang or 'fr',
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'Downloading episode list', headers=self._HEADERS, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
@@ -1355,6 +1355,7 @@ MSO_INFO = {
|
||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
@@ -1454,7 +1455,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
}, headers={
|
||||
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
'User-Agent': self._MODERN_USER_AGENT,
|
||||
} if mso_id == 'Comcast_SSO' else None)
|
||||
elif not self._cookies_passed:
|
||||
raise_mvpd_required()
|
||||
|
||||
|
||||
@@ -33,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||
'https://login.sooplive.co.kr/app/LoginAction.php', None,
|
||||
'Logging in', data=urlencode_postdata(login_form))
|
||||
|
||||
_ERRORS = {
|
||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
|
||||
-6: 'https://login.sooplive.co.kr/membership/changeMember.php',
|
||||
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
|
||||
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
|
||||
-12: 'https://member.sooplive.co.kr/app/user_security.php',
|
||||
0: 'The username does not exist or you have entered the wrong password.',
|
||||
-1: 'The username does not exist or you have entered the wrong password.',
|
||||
-3: 'You have entered your username/password incorrectly.',
|
||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||
-7: 'You cannot use your Global Soop account to access Korean Soop.',
|
||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||
}
|
||||
@@ -61,76 +61,40 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
return self._download_json(Request(
|
||||
f'https://api.m.afreecatv.com/{endpoint}',
|
||||
f'https://api.m.sooplive.co.kr/{endpoint}',
|
||||
data=data, headers=headers, query=query,
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)/?(?:$|[?#&])
|
||||
'''
|
||||
IE_NAME = 'soop'
|
||||
IE_DESC = 'sooplive.co.kr'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'url': 'https://vod.sooplive.co.kr/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
'skip': 'Video is gone',
|
||||
}, {
|
||||
# non standard key
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
|
||||
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
|
||||
'info_dict': {
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
@@ -142,12 +106,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/97267690',
|
||||
'url': 'https://vod.sooplive.co.kr/player/97267690',
|
||||
'info_dict': {
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
@@ -157,36 +121,17 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The VOD does not exist',
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vod.afreecatv.com/player/96753363',
|
||||
'info_dict': {
|
||||
'id': '20230108_9FF5BEE1_244432674_1',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult content
|
||||
'url': 'https://vod.afreecatv.com/player/70395877',
|
||||
'url': 'https://vod.sooplive.co.kr/player/70395877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subscribers only
|
||||
'url': 'https://vod.afreecatv.com/player/104647403',
|
||||
'url': 'https://vod.sooplive.co.kr/player/104647403',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private
|
||||
'url': 'https://vod.afreecatv.com/player/81669846',
|
||||
'url': 'https://vod.sooplive.co.kr/player/81669846',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -262,11 +207,11 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
|
||||
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:catchstory'
|
||||
IE_DESC = 'afreecatv.com catch story'
|
||||
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
|
||||
IE_NAME = 'soop:catchstory'
|
||||
IE_DESC = 'sooplive.co.kr catch story'
|
||||
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
|
||||
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
@@ -299,11 +244,11 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:live'
|
||||
IE_DESC = 'afreecatv.com livestreams'
|
||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||
IE_NAME = 'soop:live'
|
||||
IE_DESC = 'sooplive.co.kr livestreams'
|
||||
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'info_dict': {
|
||||
'id': '237852185',
|
||||
'ext': 'mp4',
|
||||
@@ -315,30 +260,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.afreecatv.com/pyh3646',
|
||||
'url': 'https://play.sooplive.co.kr/pyh3646',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
|
||||
_WORKING_CDNS = [
|
||||
'gcp_cdn', # live-global-cdn-v02.afreecatv.com
|
||||
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
|
||||
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
|
||||
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
|
||||
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
|
||||
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
|
||||
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
|
||||
]
|
||||
_BAD_CDNS = [
|
||||
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
|
||||
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
|
||||
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
|
||||
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
|
||||
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
|
||||
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
|
||||
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
|
||||
]
|
||||
|
||||
def _extract_formats(self, channel_info, broadcast_no, aid):
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
|
||||
|
||||
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
|
||||
default_cdn_ids = orderedSet([
|
||||
@@ -358,7 +303,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
try:
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
|
||||
headers={'Referer': 'https://play.afreecatv.com/'})
|
||||
headers={'Referer': 'https://play.sooplive.co.kr/'})
|
||||
except ExtractorError as e:
|
||||
if attempt == len(cdn_ids):
|
||||
raise
|
||||
@@ -374,7 +319,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
if not broadcast_no:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
result = channel_info.get('RESULT')
|
||||
if result == 0:
|
||||
raise UserNotLive(video_id=broadcaster_id)
|
||||
elif result == -6:
|
||||
self.raise_login_required(
|
||||
'This channel is streaming for subscribers only', method='password')
|
||||
raise ExtractorError('Unable to extract broadcast number')
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if channel_info.get('BPWD') == 'Y' and password is None:
|
||||
@@ -403,7 +354,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
formats = self._extract_formats(channel_info, broadcast_no, aid)
|
||||
|
||||
station_info = traverse_obj(self._download_json(
|
||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
|
||||
'Downloading channel metadata', 'Unable to download channel metadata',
|
||||
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
|
||||
|
||||
@@ -419,11 +370,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class AfreecaTVUserIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv:user'
|
||||
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
|
||||
class AfreecaTVUserIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop:user'
|
||||
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -431,7 +382,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 218,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
|
||||
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'parang1995',
|
||||
@@ -439,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 997,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -447,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 221,
|
||||
}, {
|
||||
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
|
||||
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'ryuryu24',
|
||||
@@ -459,12 +410,12 @@ class AfreecaTVUserIE(InfoExtractor):
|
||||
|
||||
def _fetch_page(self, user_id, user_type, page):
|
||||
page += 1
|
||||
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
|
||||
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
|
||||
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
|
||||
note=f'Downloading {user_type} video page {page}')
|
||||
for item in info['data']:
|
||||
yield self.url_result(
|
||||
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
|
||||
|
||||
@@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor):
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
@@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id), # noqa: UP031
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
}
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
return int_or_none(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||
@@ -290,8 +247,6 @@ class AnvatoIE(InfoExtractor):
|
||||
}
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
|
||||
@@ -1,27 +1,42 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
|
||||
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
|
||||
'info_dict': {
|
||||
'id': '1000665010654',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
|
||||
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
|
||||
'upload_date': '20240812',
|
||||
'timestamp': 1723449600,
|
||||
'duration': 3596,
|
||||
'series': 'Ferreck Dawn - To The Break of Dawn',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'episode': '207 - Whitney Webb Returns',
|
||||
'episode_number': 207,
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'duration': 5369,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
},
|
||||
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
server_data = self._search_json(
|
||||
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
|
||||
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
|
||||
model_data = traverse_obj(server_data, (
|
||||
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
|
||||
'model', {dict}, any))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
**self._json_ld(
|
||||
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
|
||||
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
|
||||
**traverse_obj(model_data, {
|
||||
'title': ('title', {str}),
|
||||
'url': ('streamUrl', {clean_podcast_url}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARDMediathek'
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
@@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2700,
|
||||
'duration': 2670,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
@@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'info_dict': {
|
||||
'id': '13847165',
|
||||
'chapters': 'count:8',
|
||||
'ext': 'mp4',
|
||||
'channel': 'WDR',
|
||||
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'series': 'Lokalzeit aus Düsseldorf',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
|
||||
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'upload_date': '20241031',
|
||||
'timestamp': 1730399400,
|
||||
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
|
||||
'duration': 1759,
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
@@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'age_limit': age_limit,
|
||||
**traverse_obj(media_data, {
|
||||
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
|
||||
'start_time': ('chapterTime', {int_or_none}),
|
||||
'title': ('chapterTitle', {str}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(media_data, ('meta', {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
@@ -470,7 +494,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDMediathekCollectionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https://
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/?#]+/)?
|
||||
(?P<playlist>sendung|serie|sammlung)/
|
||||
|
||||
@@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
|
||||
self._HEADERS['Authorization'] = f'Bearer {token}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
|
||||
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
|
||||
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
|
||||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import functools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -6,7 +8,9 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
@@ -17,6 +21,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
@@ -459,7 +464,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
'title': 'Discography of coldworldofficial',
|
||||
@@ -473,12 +478,19 @@ class BandcampUserIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _yield_items(self, webpage):
|
||||
yield from (
|
||||
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
yield from traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
|
||||
'data-client-items', {json.loads}, ..., 'page_url', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader)
|
||||
|
||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
||||
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||
getter=functools.partial(urljoin, url))
|
||||
|
||||
68
yt_dlp/extractor/beacon.py
Normal file
68
yt_dlp/extractor/beacon.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BeaconTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://beacon.tv/content/welcome-to-beacon',
|
||||
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
|
||||
'info_dict': {
|
||||
'id': 'welcome-to-beacon',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240509',
|
||||
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
|
||||
'title': 'Your home for Critical Role!',
|
||||
'timestamp': 1715227200,
|
||||
'duration': 105.494,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
|
||||
'md5': 'd879b091485dbed2245094c8152afd89',
|
||||
'info_dict': {
|
||||
'id': 're-slayers-take-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Re-Slayer’s Take | Official Trailer',
|
||||
'timestamp': 1715189040,
|
||||
'upload_date': '20240508',
|
||||
'duration': 53.249,
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', '__APOLLO_STATE__',
|
||||
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
|
||||
if not content_data:
|
||||
raise ExtractorError('Failed to extract content data')
|
||||
|
||||
jwplayer_data = traverse_obj(content_data, (
|
||||
(('contentVideo', 'video', 'videoData'),
|
||||
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
|
||||
if not jwplayer_data:
|
||||
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
|
||||
raise ExtractorError('Content is not a video/podcast', expected=True)
|
||||
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
|
||||
self.raise_login_required('This video/podcast is for members only')
|
||||
raise ExtractorError('Failed to extract content')
|
||||
|
||||
return {
|
||||
**self._parse_jwplayer_data(jwplayer_data, video_id),
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
@@ -1,18 +1,33 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes
|
||||
from ..utils import ExtractorError, extract_attributes
|
||||
|
||||
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
|
||||
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _brightcove_url_result(self, video_id, video_block):
|
||||
account_id = video_block.get('accountid') or '876450612001'
|
||||
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
||||
def _extract_video(self, video_block):
|
||||
video_element = self._search_regex(
|
||||
self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
|
||||
if video_element:
|
||||
video_element_attrs = extract_attributes(video_element)
|
||||
video_id = video_element_attrs.get('data-video-id')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_element_attrs.get('data-account') or '876450610001'
|
||||
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
|
||||
else:
|
||||
video_block_attrs = extract_attributes(video_block)
|
||||
video_id = video_block_attrs.get('videoid')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_block_attrs.get('accountid') or '876630703001'
|
||||
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
@@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video_block = extract_attributes(self._search_regex(
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
class BFMTVLiveIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:live'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/en-direct/',
|
||||
'info_dict': {
|
||||
'id': '5615950982001',
|
||||
'id': '6346069778112',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20220926',
|
||||
'timestamp': 1664207191,
|
||||
'upload_date': '20240202',
|
||||
'timestamp': 1706887572,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://.+/image\.jpg',
|
||||
'tags': [],
|
||||
@@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVArticleIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:article'
|
||||
@@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, webpage):
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video = self._extract_video(video_block_el)
|
||||
if video:
|
||||
yield video
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
|
||||
entries = []
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video_block = extract_attributes(video_block_el)
|
||||
video_id = video_block.get('videoid')
|
||||
if not video_id:
|
||||
continue
|
||||
entries.append(self._brightcove_url_result(video_id, video_block))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
||||
|
||||
@@ -46,6 +46,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||
_wbi_key_cache = {}
|
||||
@@ -192,7 +193,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}')
|
||||
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
self.report_warning(
|
||||
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||
@@ -207,7 +208,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False)
|
||||
note='Extracting chapters', fatal=False, headers=self._HEADERS)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
'start_time': 'from',
|
||||
@@ -298,7 +299,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
@@ -622,6 +623,10 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'skip': 'geo-restricted',
|
||||
}, {
|
||||
'note': 'has - in the last path segment of the url',
|
||||
'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -1017,8 +1022,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BilibiliCheeseBaseIE(BilibiliBaseIE):
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _extract_episode(self, season_info, ep_id):
|
||||
episode_info = traverse_obj(season_info, (
|
||||
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
|
||||
@@ -1848,7 +1851,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
|
||||
388
yt_dlp/extractor/bluesky.py
Normal file
388
yt_dlp/extractor/bluesky.py
Normal file
@@ -0,0 +1,388 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
truncate_string,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BlueskyIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
|
||||
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
|
||||
'md5': '375539c1930ab05d15585ed772ab54fd',
|
||||
'info_dict': {
|
||||
'id': '3l4omssdl632g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Blu3Blu3Lilith',
|
||||
'uploader_id': 'blu3blue.bsky.social',
|
||||
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
|
||||
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'OMG WE HAVE VIDEOS NOW',
|
||||
'description': 'OMG WE HAVE VIDEOS NOW',
|
||||
'upload_date': '20240921',
|
||||
'timestamp': 1726940605,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
|
||||
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
|
||||
'info_dict': {
|
||||
'id': '3l4qhp7bcs52c',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'souris',
|
||||
'uploader_id': 'souris.moe',
|
||||
'uploader_url': 'https://bsky.app/profile/souris.moe',
|
||||
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l4qhp7bcs52c',
|
||||
'upload_date': '20240922',
|
||||
'timestamp': 1727003838,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
|
||||
'info_dict': {
|
||||
'id': '3l3w4tnezek2e',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'clean',
|
||||
'uploader_id': 'de1.pds.tentacle.expert',
|
||||
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
|
||||
'channel_id': 'did:web:de1.tentacle.expert',
|
||||
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l3w4tnezek2e',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726098823,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
|
||||
'info_dict': {
|
||||
'id': 'XxK3t_5V3ao',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'yunayu',
|
||||
'uploader_id': '@yunayuispink',
|
||||
'uploader_url': 'https://www.youtube.com/@yunayuispink',
|
||||
'channel': 'yunayu',
|
||||
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
|
||||
'description': r're:Have a good goodx10000day',
|
||||
'title': '5min vs 5hours drawing',
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'upload_date': '20241026',
|
||||
'timestamp': 1729967784,
|
||||
'duration': 321,
|
||||
'age_limit': 0,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': [],
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
|
||||
'info_dict': {
|
||||
'id': '222792849',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'LASERBAT',
|
||||
'uploader_id': 'laserbatx',
|
||||
'uploader_url': 'https://laserbatx.bandcamp.com',
|
||||
'artists': ['LASERBAT'],
|
||||
'album_artists': ['LASERBAT'],
|
||||
'album': 'Hari Nezumi [EP]',
|
||||
'track': 'Forward to the End',
|
||||
'title': 'LASERBAT - Forward to the End',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
|
||||
'duration': 228.571,
|
||||
'track_id': '222792849',
|
||||
'release_date': '20230423',
|
||||
'upload_date': '20230423',
|
||||
'timestamp': 1682276040.0,
|
||||
'release_timestamp': 1682276040.0,
|
||||
'track_number': 1,
|
||||
},
|
||||
'add_ie': ['Bandcamp'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
|
||||
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||
'info_dict': {
|
||||
'id': '3l7rdfxhyds2f',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'cinnamon',
|
||||
'uploader_id': 'alt.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
|
||||
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'crazy that i look like this tbh',
|
||||
'description': 'crazy that i look like this tbh',
|
||||
'upload_date': '20241030',
|
||||
'timestamp': 1730332128,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['sexual'],
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
|
||||
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
|
||||
'info_dict': {
|
||||
'id': '3l6zrz6zyl2dr',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'mary🐇',
|
||||
'uploader_id': 'mary.my.id',
|
||||
'uploader_url': 'https://bsky.app/profile/mary.my.id',
|
||||
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l6zrz6zyl2dr',
|
||||
'upload_date': '20241021',
|
||||
'timestamp': 1729523172,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241026',
|
||||
'description': 'One of my favorite videos',
|
||||
'comment_count': int,
|
||||
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
|
||||
'uploader': 'Purple.Ice.Tea',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'like_count': int,
|
||||
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'repost_count': int,
|
||||
'timestamp': 1729973202,
|
||||
'tags': [],
|
||||
'uploader_id': 'purpleicetea.bsky.social',
|
||||
'title': 'One of my favorite videos',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3l77u64l7le2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'hearing people on twitter say that bluesky isn\'...',
|
||||
'like_count': int,
|
||||
'uploader_id': 'thafnine.net',
|
||||
'uploader_url': 'https://bsky.app/profile/thafnine.net',
|
||||
'upload_date': '20241024',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
|
||||
'tags': [],
|
||||
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
|
||||
'uploader': 'T9',
|
||||
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'timestamp': 1729731642,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}],
|
||||
}]
|
||||
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
|
||||
|
||||
def _get_service_endpoint(self, did, video_id):
|
||||
if did.startswith('did:web:'):
|
||||
url = f'https://{did[8:]}/.well-known/did.json'
|
||||
else:
|
||||
url = f'https://plc.directory/{did}'
|
||||
services = self._download_json(
|
||||
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
|
||||
return traverse_obj(
|
||||
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
|
||||
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
|
||||
|
||||
def _real_extract(self, url):
|
||||
handle, video_id = self._match_valid_url(url).group('handle', 'id')
|
||||
|
||||
post = self._download_json(
|
||||
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
|
||||
video_id, query={
|
||||
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
|
||||
'depth': 0,
|
||||
'parentHeight': 0,
|
||||
})['thread']['post']
|
||||
|
||||
entries = []
|
||||
# app.bsky.embed.video.view/app.bsky.embed.external.view
|
||||
entries.extend(self._extract_videos(post, video_id))
|
||||
# app.bsky.embed.recordWithMedia.view
|
||||
entries.extend(self._extract_videos(
|
||||
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
|
||||
# app.bsky.embed.record.view
|
||||
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
|
||||
entries.extend(self._extract_videos(
|
||||
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
|
||||
|
||||
if not entries:
|
||||
raise ExtractorError('No video could be found in this post', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
@staticmethod
|
||||
def _build_profile_url(path):
|
||||
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
|
||||
|
||||
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
|
||||
embed_path = variadic(embed_path, (str, bytes, dict, set))
|
||||
record_path = variadic(record_path, (str, bytes, dict, set))
|
||||
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
|
||||
|
||||
entries = []
|
||||
if external_uri := traverse_obj(root, (
|
||||
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
|
||||
entries.append(self.url_result(external_uri))
|
||||
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
return entries
|
||||
|
||||
video_cid = traverse_obj(
|
||||
root, (*embed_path, 'cid', {str}),
|
||||
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
|
||||
did = traverse_obj(root, ('author', 'did', {str}))
|
||||
|
||||
if did and video_cid:
|
||||
endpoint = self._get_service_endpoint(did, video_id)
|
||||
|
||||
formats.append({
|
||||
'format_id': 'blob',
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'ext': ('mimeType', {mimetype2ext}),
|
||||
})),
|
||||
})
|
||||
|
||||
for sub_data in traverse_obj(root, (
|
||||
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
|
||||
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(root, {
|
||||
'id': ('uri', {url_basename}),
|
||||
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
|
||||
'alt_title': (*embed_path, 'alt', {str}, filter),
|
||||
'uploader': ('author', 'displayName', {str}),
|
||||
'uploader_id': ('author', 'handle', {str}),
|
||||
'uploader_url': ('author', 'handle', {self._build_profile_url}),
|
||||
'channel_id': ('author', 'did', {str}),
|
||||
'channel_url': ('author', 'did', {self._build_profile_url}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'repost_count': ('repostCount', {int_or_none}),
|
||||
'comment_count': ('replyCount', {int_or_none}),
|
||||
'timestamp': ('indexedAt', {parse_iso8601}),
|
||||
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
|
||||
'age_limit': (
|
||||
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
|
||||
'description': (*record_path, 'text', {str}, filter),
|
||||
'title': (*record_path, 'text', {lambda x: x.replace('\n', '')}, {truncate_string(left=50)}),
|
||||
}),
|
||||
})
|
||||
return entries
|
||||
@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||
'info_dict': {
|
||||
|
||||
@@ -4,7 +4,6 @@ import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
@@ -12,7 +11,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
@@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# This is a normal, public, TV show video
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s06e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoke Signals',
|
||||
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'duration': 1314,
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
||||
'duration': 1324,
|
||||
'categories': ['comedy'],
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season': 'Season 6',
|
||||
@@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode': 'Smoke Signals',
|
||||
'episode_number': 1,
|
||||
'episode_id': 'schitts-creek/s06e01',
|
||||
'upload_date': '20210618',
|
||||
'timestamp': 1623988800,
|
||||
'release_date': '20200107',
|
||||
'release_timestamp': 1578427200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# This video requires an account in the browser, but works fine in yt-dlp
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
|
||||
'md5': '297a9600f554f2258aed01514226a697',
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s01e01',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Cup Runneth Over',
|
||||
'description': 'md5:9bca14ea49ab808097530eb05a29e797',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
@@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode_id': 'schitts-creek/s01e01',
|
||||
'duration': 1309,
|
||||
'categories': ['comedy'],
|
||||
'upload_date': '20210617',
|
||||
'timestamp': 1623902400,
|
||||
'release_date': '20151124',
|
||||
'release_timestamp': 1448323200,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
|
||||
'only_matching': True,
|
||||
@@ -631,38 +633,6 @@ class CBCGemIE(InfoExtractor):
|
||||
return
|
||||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||
|
||||
def _find_secret_formats(self, formats, video_id):
|
||||
""" Find a valid video url and convert it to the secret variant """
|
||||
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
|
||||
if not base_format:
|
||||
return
|
||||
|
||||
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
|
||||
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
|
||||
|
||||
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
|
||||
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
|
||||
return
|
||||
|
||||
for child in secret_xml:
|
||||
if child.attrib.get('Type') != 'video':
|
||||
continue
|
||||
for video_quality in child:
|
||||
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
|
||||
if not bitrate or 'Index' not in video_quality.attrib:
|
||||
continue
|
||||
height = int_or_none(video_quality.attrib.get('MaxHeight'))
|
||||
|
||||
yield {
|
||||
**base_format,
|
||||
'format_id': join_nonempty('sec', height),
|
||||
# Note: \g<1> is necessary instead of \1 since bitrate is a number
|
||||
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
|
||||
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
|
||||
'tbr': bitrate / 1000.0,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json(
|
||||
@@ -676,7 +646,6 @@ class CBCGemIE(InfoExtractor):
|
||||
else:
|
||||
headers = {}
|
||||
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
||||
m3u8_url = m3u8_info.get('url')
|
||||
|
||||
if m3u8_info.get('errorCode') == 1:
|
||||
self.raise_geo_restricted(countries=['CA'])
|
||||
@@ -685,9 +654,9 @@ class CBCGemIE(InfoExtractor):
|
||||
elif m3u8_info.get('errorCode') != 0:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
||||
self._remove_duplicate_formats(formats)
|
||||
formats.extend(self._find_secret_formats(formats, video_id))
|
||||
|
||||
for fmt in formats:
|
||||
if fmt.get('vcodec') == 'none':
|
||||
@@ -703,20 +672,21 @@ class CBCGemIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'series': video_info.get('series'),
|
||||
'season_number': video_info.get('season'),
|
||||
'season': f'Season {video_info.get("season")}',
|
||||
'episode_number': video_info.get('episode'),
|
||||
'episode': video_info.get('title'),
|
||||
'episode_id': video_id,
|
||||
'duration': video_info.get('duration'),
|
||||
'categories': [video_info.get('category')],
|
||||
'formats': formats,
|
||||
'release_timestamp': video_info.get('airDate'),
|
||||
'timestamp': video_info.get('availableDate'),
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'series': ('series', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'categories': ('category', {str}, all),
|
||||
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -12,53 +12,86 @@ from ..utils import (
|
||||
|
||||
|
||||
class CCMAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
|
||||
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||
'info_dict': {
|
||||
'id': '5630208',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
'alt_title': 'EsportMarató2016WEB_PerPublicar',
|
||||
'duration': 79,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
|
||||
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'categories': ['Divulgació'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
'md5': 'fa3e38f269329a278271276330261425',
|
||||
'info_dict': {
|
||||
'id': '943685',
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'upload_date': '20161217',
|
||||
'timestamp': 1482011700,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
'series': 'Tot gira',
|
||||
'duration': 821,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
|
||||
'md5': '27493513d08a3e5605814aee9bb778d2',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'timestamp': 1582577919,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'subtitles': 'mincount:1',
|
||||
'age_limit': 13,
|
||||
'series': 'Crims',
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
|
||||
'duration': 3203,
|
||||
'categories': ['Divulgació'],
|
||||
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
|
||||
'info_dict': {
|
||||
'id': '5759227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Una mosca volava per la llum',
|
||||
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
|
||||
'description': 'md5:9ab64276944b0825336f4147f13f7854',
|
||||
'series': 'Mic',
|
||||
'upload_date': '20180411',
|
||||
'timestamp': 1523440105,
|
||||
'duration': 160,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
|
||||
'categories': ['Música'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_type, media_id = self._match_valid_url(url).groups()
|
||||
media_type, media_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
media = self._download_json(
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
|
||||
@@ -12,6 +12,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_ord
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
@@ -351,3 +352,50 @@ class CDAIE(InfoExtractor):
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
'info_dict': {
|
||||
'id': '31188385',
|
||||
'title': 'SERIA DRUGA',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
|
||||
'info_dict': {
|
||||
'id': '2664592',
|
||||
'title': 'VideoDowcipy - wszystkie odcinki',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
|
||||
'info_dict': {
|
||||
'id': '19129979',
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
|
||||
|
||||
webpage = self._download_webpage(url, folder_id)
|
||||
|
||||
def extract_page_entries(page):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
|
||||
f'Downloading page {page + 1}', expected_status=404)
|
||||
items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
|
||||
for video_id in items:
|
||||
yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
|
||||
folder_id, self._og_search_title(webpage))
|
||||
|
||||
@@ -146,19 +146,33 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
|
||||
video_status = video_meta.get('vodStatus')
|
||||
if video_status == 'UPLOAD':
|
||||
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
|
||||
elif video_status == 'ABR_HLS':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
|
||||
video_id, query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
})
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
|
||||
formats, subtitles = [], {}
|
||||
live_status = 'post_live' if live_status == 'was_live' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
|
||||
@@ -1,146 +1,226 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import merge_dicts, try_call, url_basename
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_call,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_elements, traverse_obj
|
||||
|
||||
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'info_dict': {
|
||||
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
|
||||
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
'upload_date': '20240531',
|
||||
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
|
||||
'duration': 373.0,
|
||||
'timestamp': 1717148586,
|
||||
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
|
||||
'modified_date': '20240531',
|
||||
'modified_timestamp': 1717150140,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'info_dict': {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
|
||||
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'ext': 'mp4',
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
|
||||
'title': 'Here’s how some inmates in closely divided state are now able to vote from jail',
|
||||
'timestamp': 1718158269,
|
||||
'upload_date': '20240612',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
|
||||
'duration': 202.0,
|
||||
'modified_date': '20240612',
|
||||
'modified_timestamp': 1718158509,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
|
||||
'info_dict': {
|
||||
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
|
||||
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
|
||||
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
|
||||
'title': 'Video shows King Charles\' portrait being vandalized by activists',
|
||||
'timestamp': 1718113852,
|
||||
'upload_date': '20240611',
|
||||
'duration': 51.0,
|
||||
'modified_timestamp': 1718116193,
|
||||
'modified_date': '20240611',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
|
||||
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
|
||||
'duration': 158.0,
|
||||
'title': 'Robin Meade signs off after HLN\'s last broadcast',
|
||||
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670284296,
|
||||
'modified_timestamp': 1670332404,
|
||||
'modified_date': '20221206',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'info_dict': {
|
||||
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'timestamp': 1729501452,
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
|
||||
'description': 'md5:256ee7137d161f776cda429654135e52',
|
||||
'upload_date': '20241021',
|
||||
'duration': 31.0,
|
||||
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
|
||||
'modified_date': '20241021',
|
||||
'modified_timestamp': 1729501530,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
|
||||
'info_dict': {
|
||||
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
|
||||
'info_dict': {
|
||||
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
|
||||
'ext': 'mp4',
|
||||
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
|
||||
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
|
||||
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
|
||||
'duration': 173.0,
|
||||
'timestamp': 1729122182,
|
||||
'upload_date': '20241016',
|
||||
'modified_timestamp': 1729194706,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'md5': '11604ab4af83b650826753f1ccb8ecff',
|
||||
'info_dict': {
|
||||
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
|
||||
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
|
||||
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
|
||||
'duration': 145.0,
|
||||
'timestamp': 1729137765,
|
||||
'upload_date': '20241017',
|
||||
'modified_timestamp': 1729138184,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}],
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = self._match_valid_url(url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
display_id = self._match_valid_url(url).group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_id = traverse_obj(
|
||||
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
|
||||
('TOP_AUTH_SERVICE_APP_ID', {str}))
|
||||
|
||||
entries = []
|
||||
for player_data in traverse_obj(webpage, (
|
||||
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
|
||||
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
|
||||
media_id = player_data['data-media-id']
|
||||
parent_uri = player_data.get('data-video-resource-parent-uri')
|
||||
formats, subtitles = [], {}
|
||||
|
||||
video_data = {}
|
||||
if parent_uri:
|
||||
video_data = self._download_json(
|
||||
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
|
||||
query={
|
||||
'id': media_id,
|
||||
'stellarUri': parent_uri,
|
||||
})
|
||||
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
|
||||
resolution, bitrate = None, None
|
||||
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
|
||||
resolution, bitrate = mobj.group('res', 'tbr')
|
||||
formats.append({
|
||||
'url': direct_url,
|
||||
'format_id': 'direct',
|
||||
'quality': 1,
|
||||
'tbr': int_or_none(bitrate),
|
||||
**parse_resolution(resolution),
|
||||
})
|
||||
for sub_data in traverse_obj(video_data, (
|
||||
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
|
||||
'url': sub_data['url'],
|
||||
'name': sub_data.get('label'),
|
||||
})
|
||||
|
||||
if app_id:
|
||||
media_data = self._download_json(
|
||||
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
|
||||
query={'appId': app_id})
|
||||
m3u8_url = traverse_obj(media_data, (
|
||||
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
|
||||
if m3u8_url:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
entries.append({
|
||||
**traverse_obj(player_data, {
|
||||
'title': ('data-headline', {clean_html}),
|
||||
'description': ('data-description', {clean_html}),
|
||||
'duration': ('data-duration', {parse_duration}),
|
||||
'timestamp': ('data-publish-date', {parse_iso8601}),
|
||||
'thumbnail': (
|
||||
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
|
||||
{functools.partial(update_url, query='c=original')}),
|
||||
'display_id': 'data-video-slug',
|
||||
}),
|
||||
**traverse_obj(video_data, {
|
||||
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
|
||||
'description': ('description', {clean_html}),
|
||||
'title': ('headline', {str}),
|
||||
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
|
||||
'duration': ('trt', {int_or_none}),
|
||||
}),
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||
_TEST = {
|
||||
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminalizing journalism?',
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
|
||||
class CNNIndonesiaIE(InfoExtractor):
|
||||
|
||||
@@ -35,6 +35,7 @@ from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
TransportError,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
@@ -46,6 +47,7 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
ISO639Utils,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
@@ -332,7 +334,7 @@ class InfoExtractor:
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||
average_rating: Average rating given by users, the scale used depends on the webpage
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
properties (all but one of text or html optional):
|
||||
@@ -519,7 +521,7 @@ class InfoExtractor:
|
||||
or _extract_from_webpage as necessary. While these are normally classmethods,
|
||||
_extract_from_webpage is allowed to be an instance method.
|
||||
|
||||
_extract_from_webpage may raise self.StopExtraction() to stop further
|
||||
_extract_from_webpage may raise self.StopExtraction to stop further
|
||||
processing of the webpage and obtain exclusive rights to it. This is useful
|
||||
when the extractor cannot reliably be matched using just the URL,
|
||||
e.g. invidious/peertube instances
|
||||
@@ -572,13 +574,13 @@ class InfoExtractor:
|
||||
|
||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||
cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'
|
||||
return {
|
||||
None: '',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}',
|
||||
'password': f'Use {password_hint}',
|
||||
'cookies': (
|
||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
|
||||
'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}',
|
||||
'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}',
|
||||
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
@@ -965,6 +967,9 @@ class InfoExtractor:
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
if content is False:
|
||||
assert not fatal
|
||||
return False
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
@@ -1039,7 +1044,15 @@ class InfoExtractor:
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
try:
|
||||
webpage_bytes = urlh.read()
|
||||
except TransportError as err:
|
||||
errmsg = f'{video_id}: Error reading response: {err.msg}'
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, cause=err)
|
||||
self.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
@@ -1396,6 +1409,13 @@ class InfoExtractor:
|
||||
return None, None
|
||||
|
||||
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
||||
|
||||
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
|
||||
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
|
||||
if sys.version_info < (3, 11):
|
||||
return tuple(x if x != '""' else '' for x in info[::2])
|
||||
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
@@ -1698,7 +1718,7 @@ class InfoExtractor:
|
||||
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
|
||||
if rating is not None:
|
||||
info['average_rating'] = rating
|
||||
if is_type(e, 'TVEpisode', 'Episode'):
|
||||
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': episode_name,
|
||||
@@ -2065,7 +2085,7 @@ class InfoExtractor:
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
||||
if self.get_param('hls_split_discontinuity', False):
|
||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||
@@ -2800,11 +2820,11 @@ class InfoExtractor:
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if try_call(lambda: base_url_e.text) is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
if re.match(r'https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and base_url.startswith('/'):
|
||||
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
||||
elif mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
elif mpd_base_url and not re.match(r'https?://', base_url):
|
||||
if not mpd_base_url.endswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
@@ -2894,7 +2914,7 @@ class InfoExtractor:
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
return 'url' if re.match(r'https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
@@ -3059,7 +3079,11 @@ class InfoExtractor:
|
||||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
stream_language = stream.get('Language', 'und')
|
||||
# IsmFD expects ISO 639 Set 2 language codes (3-character length)
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
|
||||
stream_language = stream.get('Language') or 'und'
|
||||
if len(stream_language) != 3:
|
||||
stream_language = ISO639Utils.short2long(stream_language) or 'und'
|
||||
for track in stream.findall('QualityLevel'):
|
||||
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
|
||||
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
|
||||
@@ -3489,7 +3513,7 @@ class InfoExtractor:
|
||||
continue
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
|
||||
@@ -6,12 +6,37 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/all-american-homecoming/ready-or-not/?play=d848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'info_dict': {
|
||||
'id': 'd848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ready Or Not',
|
||||
'description': 'Simone is concerned about changes taking place at Bringston; JR makes a decision about his future.',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 2547,
|
||||
'timestamp': 1720519200,
|
||||
'uploader': 'CWTV',
|
||||
'chapters': 'count:6',
|
||||
'series': 'All American: Homecoming',
|
||||
'season_number': 3,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20240709',
|
||||
'season': 'Season 3',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
@@ -69,13 +94,14 @@ class CWTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)
|
||||
f'https://images.cwtv.com/feed/mobileapp/video-meta/apiversion_12/guid_{video_id}', video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U'
|
||||
mpx_url = update_url_query(
|
||||
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
|
||||
{'formats': 'M3U+none'})
|
||||
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
|
||||
@@ -10,11 +10,14 @@ from ..utils import (
|
||||
OnDemandPagedList,
|
||||
age_restricted,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -98,12 +101,20 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
dai\.ly/|
|
||||
(?:
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
|
||||
(?:www\.)?lequipe\.fr
|
||||
)/
|
||||
(?:
|
||||
swf/(?!video)|
|
||||
(?:(?:crawler|embed|swf)/)?video/|
|
||||
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
|
||||
)
|
||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
)
|
||||
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||
_TESTS = [{
|
||||
@@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
||||
@@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['en_quete_d_esprit'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
@@ -217,6 +228,66 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||
'only_matching': True,
|
||||
}, { # playlist-only
|
||||
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://dai.ly/x94cnnk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
|
||||
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
|
||||
'info_dict': {
|
||||
'id': 'x93blhi',
|
||||
'ext': 'mp4',
|
||||
'title': 'OnAir - 01/08/24',
|
||||
'description': '',
|
||||
'duration': 217,
|
||||
'timestamp': 1722505658,
|
||||
'upload_date': '20240801',
|
||||
'uploader': 'Financialounge',
|
||||
'uploader_id': 'x2vtgmm',
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
|
||||
'info_dict': {
|
||||
'id': 'x7wdsj',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
# https://www.dailymotion.com/crawler/video/x8u4owg
|
||||
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
|
||||
'info_dict': {
|
||||
'id': 'x8u4owg',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Le Parisien',
|
||||
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
|
||||
'upload_date': '20240309',
|
||||
'view_count': int,
|
||||
'timestamp': 1709997866,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x32f7b',
|
||||
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
|
||||
'duration': 428.0,
|
||||
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_COMMON_MEDIA_FIELDS = '''description
|
||||
@@ -232,16 +303,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
player_url = url_or_none(attrs.get('src'))
|
||||
if not player_url:
|
||||
continue
|
||||
player_url = player_url.replace('.js', '.html')
|
||||
if player_url.startswith('//'):
|
||||
player_url = f'https:{player_url}'
|
||||
if video_id := attrs.get('data-video'):
|
||||
query_string = f'video={video_id}'
|
||||
elif playlist_id := attrs.get('data-playlist'):
|
||||
query_string = f'playlist={playlist_id}'
|
||||
else:
|
||||
continue
|
||||
yield update_url(player_url, query=query_string)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
||||
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
||||
|
||||
if playlist_id:
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
if is_playlist: # We matched the playlist query param as video_id
|
||||
playlist_id = video_id
|
||||
video_id = None
|
||||
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
f'http://www.dailymotion.com/playlist/{playlist_id}',
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
media = self._call_api(
|
||||
@@ -282,6 +372,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
title = metadata['title']
|
||||
is_live = media.get('isOnAir')
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for m in media_list:
|
||||
media_url = m.get('url')
|
||||
@@ -289,8 +381,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if media_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
@@ -310,20 +404,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||
f['fps'] = 60
|
||||
|
||||
subtitles = {}
|
||||
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
||||
for subtitle_lang, subtitle in subtitles_data.items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'url': subtitle_url,
|
||||
} for subtitle_url in subtitle.get('urls', [])]
|
||||
|
||||
thumbnails = []
|
||||
for height, poster_url in metadata.get('posters', {}).items():
|
||||
thumbnails.append({
|
||||
'height': int_or_none(height),
|
||||
'id': height,
|
||||
'url': poster_url,
|
||||
})
|
||||
thumbnails = traverse_obj(metadata, (
|
||||
('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
|
||||
'height': (0, {int_or_none}),
|
||||
'id': (0, {str}),
|
||||
'url': 1,
|
||||
}))
|
||||
|
||||
owner = metadata.get('owner') or {}
|
||||
stats = media.get('stats') or {}
|
||||
@@ -447,7 +539,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
|
||||
@@ -319,32 +319,6 @@ class DPlayIE(DPlayBaseIE):
|
||||
url, display_id, host, 'dplay' + country, country, domain)
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
||||
|
||||
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
|
||||
|
||||
@@ -373,6 +347,45 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class HGTVDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'info_dict': {
|
||||
'id': '7332936',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
|
||||
'title': 'Vom Landleben ins Loft',
|
||||
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
|
||||
'episode': 'Episode 7',
|
||||
'episode_number': 7,
|
||||
'season': 'Season 7',
|
||||
'season_number': 7,
|
||||
'series': 'Mein Kleinstadt-Traumhaus',
|
||||
'duration': 2645.0,
|
||||
'timestamp': 1725998100,
|
||||
'upload_date': '20240910',
|
||||
'creators': ['HGTV'],
|
||||
'tags': [],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
|
||||
},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'eu1-prod.disco-api.com',
|
||||
'realm': 'hgtv',
|
||||
'country': 'de',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
|
||||
@@ -6,8 +6,10 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,43 +38,58 @@ class DropboxIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def _yield_decoded_parts(self, webpage):
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = urllib.parse.unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if '/sm/password' in part:
|
||||
webpage = self._download_webpage(
|
||||
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
|
||||
break
|
||||
|
||||
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
if password:
|
||||
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
|
||||
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
|
||||
'password': password,
|
||||
'url': url,
|
||||
}))
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Authentication failed!', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
else:
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles, has_anonymous_download = [], {}, False
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
formats, subtitles = [], {}
|
||||
has_anonymous_download = False
|
||||
thumbnail = None
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if not has_anonymous_download:
|
||||
has_anonymous_download = self._search_regex(
|
||||
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
thumbnail = self._search_regex(
|
||||
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
|
||||
break
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
@@ -89,4 +106,5 @@ class DropboxIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
@@ -139,12 +139,11 @@ class DRTVIE(InfoExtractor):
|
||||
return
|
||||
|
||||
token_response = self._download_json(
|
||||
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
'https://isl.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||
note='Downloading anonymous token', headers={
|
||||
'content-type': 'application/json',
|
||||
}, query={
|
||||
'device': 'web_browser',
|
||||
'ff': 'idp,ldp,rpt',
|
||||
'device': 'phone_android',
|
||||
'lang': 'da',
|
||||
'supportFallbackToken': 'true',
|
||||
}, data=json.dumps({
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
@@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
|
||||
def _extract_formats_and_subs(self, video_id):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(media_info, (
|
||||
'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
|
||||
'Formats', lambda _, v: url_or_none(v['Url']))):
|
||||
fmt_url = media['Url']
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return formats, subs
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
|
||||
class WatchESPNIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'info_dict': {
|
||||
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
|
||||
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Huddersfield vs. Burnley',
|
||||
'duration': 7500,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
'title': 'Abilene Chrstn vs. Texas Tech',
|
||||
'duration': 14166,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'info_dict': {
|
||||
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
|
||||
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
|
||||
'duration': 8335,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'UC Davis vs. California',
|
||||
'duration': 9547,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'info_dict': {
|
||||
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
|
||||
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Wheel - Episode 10',
|
||||
'duration': 3352,
|
||||
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
|
||||
'title': 'The College Football Show',
|
||||
'duration': 3639,
|
||||
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
|
||||
if not cookie:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
|
||||
id_token = self._download_json(
|
||||
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
|
||||
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
|
||||
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
|
||||
}).encode())['data']['token']['id_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'devices', video_id,
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'},
|
||||
@@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
|
||||
})['access_token']
|
||||
|
||||
assertion = self._call_bamgrid_api(
|
||||
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
|
||||
'accounts/grant', video_id, payload={'id_token': id_token},
|
||||
headers={
|
||||
'Authorization': token,
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
|
||||
@@ -3,7 +3,12 @@ from ..utils import traverse_obj
|
||||
|
||||
|
||||
class EurosportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
|
||||
eurosport\.tvn24\.pl
|
||||
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||
'info_dict': {
|
||||
@@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
|
||||
'duration': 105.0,
|
||||
'upload_date': '20230518',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = None
|
||||
@@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
|
||||
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
|
||||
# but this method require to get sha256 hash
|
||||
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_initialize(self):
|
||||
if EurosportIE._TOKEN is None:
|
||||
@@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
|
||||
for stream_type in json_data['attributes']['streaming']:
|
||||
if stream_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
|
||||
elif stream_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
elif stream_type == 'mss':
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
@@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'duration': 3133.583,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
@@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
@@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'md5': '1659aa21fb3dd1585874f668e81a72c8',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
'duration': 44.181,
|
||||
},
|
||||
}, {
|
||||
# FIXME: unable to extract uploader, no formats found
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
@@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1511548260,
|
||||
'upload_date': '20171124',
|
||||
'uploader': 'Vickie Gentry',
|
||||
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
||||
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
'duration': 148.224,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
@@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
@@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'duration': 4524.001,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
@@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'duration': 3.283,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
@@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# FIXME: Cannot parse data error
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
@@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
or get_first(post, ('event', 'event_creator', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
@@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
|
||||
'like_count': ('likers', 'count', {int}),
|
||||
'comment_count': ('total_comment_count', {int}),
|
||||
'repost_count': ('share_count_reduced', {parse_count}),
|
||||
}), get_all=False),
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@@ -555,11 +564,12 @@ class FacebookIE(InfoExtractor):
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
dash_manifest = traverse_obj(
|
||||
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
mpd_url=url_or_none(video.get('dash_manifest_url'))))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
@@ -609,12 +619,13 @@ class FacebookIE(InfoExtractor):
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
playable_url = fmt_data.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
if determine_ext(playable_url) == 'mpd':
|
||||
@@ -626,7 +637,7 @@ class FacebookIE(InfoExtractor):
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
extract_dash_manifest(fmt_data, formats)
|
||||
if not formats:
|
||||
# Do not append false positive entry w/o any formats
|
||||
return
|
||||
@@ -932,18 +943,21 @@ class FacebookReelIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -963,6 +977,7 @@ class FacebookAdsIE(InfoExtractor):
|
||||
'id': '899206155126718',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by Kandao',
|
||||
'description': 'md5:0822724069e3aca97cbed5dabbab282e',
|
||||
'uploader': 'Kandao',
|
||||
'uploader_id': '774114102743284',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
@@ -971,6 +986,22 @@ class FacebookAdsIE(InfoExtractor):
|
||||
'upload_date': '20231214',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# key 'watermarked_video_sd_url' missing
|
||||
'url': 'https://www.facebook.com/ads/library/?id=501152689226254',
|
||||
'info_dict': {
|
||||
'id': '501152689226254',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by mat.nawrocki',
|
||||
'description': 'md5:02a446ace7ff8c3c37a2892922492490',
|
||||
'uploader': 'mat.nawrocki',
|
||||
'uploader_id': '148586968341456',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1723452305,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20240812',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
|
||||
'info_dict': {
|
||||
@@ -1017,34 +1048,42 @@ class FacebookAdsIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
post_data = [self._parse_json(j, video_id, fatal=False)
|
||||
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
|
||||
data = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
|
||||
post_data = traverse_obj(
|
||||
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
|
||||
data = get_first(post_data, (
|
||||
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
|
||||
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
|
||||
if not data:
|
||||
raise ExtractorError('Unable to extract ad data')
|
||||
|
||||
title = data.get('title')
|
||||
if not title or title == '{{product.name}}':
|
||||
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
|
||||
markup_id = traverse_obj(data, ('body', '__m', {str}))
|
||||
markup = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'markup', lambda _, v: v[0].startswith(markup_id),
|
||||
..., '__html', {clean_html}, {lambda x: not x.startswith('{{product.') and x}, any))
|
||||
|
||||
info_dict = traverse_obj(data, {
|
||||
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
|
||||
info_dict = merge_dicts({
|
||||
'title': title,
|
||||
'description': markup or None,
|
||||
}, traverse_obj(data, {
|
||||
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
|
||||
'uploader': ('page_name', {str}),
|
||||
'uploader_id': ('page_id', {str_or_none}),
|
||||
'uploader_url': ('page_profile_uri', {url_or_none}),
|
||||
'timestamp': ('creation_time', {int_or_none}),
|
||||
'like_count': ('page_like_count', {int_or_none}),
|
||||
})
|
||||
}))
|
||||
|
||||
entries = []
|
||||
for idx, entry in enumerate(traverse_obj(
|
||||
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1,
|
||||
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v.get(f)) for f in self._FORMATS_MAP))), 1,
|
||||
):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{idx}',
|
||||
'title': entry.get('title') or title,
|
||||
'description': entry.get('link_description') or info_dict.get('description'),
|
||||
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
|
||||
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
|
||||
'formats': self._extract_formats(entry),
|
||||
})
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -3,7 +3,7 @@ from .nexx import NexxIE
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8610449476156f338761a75391b0017d',
|
||||
@@ -27,6 +27,9 @@ class FunkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -8,6 +8,9 @@ from .common import InfoExtractor
|
||||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
MEDIA_EXTENSIONS,
|
||||
@@ -2340,7 +2343,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
if re.match(r'[^\s/]+\.[^\s/]+/', url):
|
||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2373,6 +2376,11 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
|
||||
impersonate = self._configuration_arg('impersonate', ['false'])
|
||||
if 'false' in impersonate:
|
||||
impersonate = None
|
||||
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||
@@ -2381,10 +2389,29 @@ class GenericIE(InfoExtractor):
|
||||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}))
|
||||
try:
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||
and e.cause.response.extensions.get('impersonate') is None):
|
||||
raise
|
||||
cf_cookie_domain = traverse_obj(
|
||||
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||
('__cf_bm', 'domain'))
|
||||
if cf_cookie_domain:
|
||||
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
|
||||
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
|
||||
'how to install the required impersonation dependency, and ')
|
||||
raise ExtractorError(
|
||||
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
|
||||
|
||||
new_url = full_response.url
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
@@ -2400,7 +2427,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
self.report_detected('direct video link')
|
||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||
|
||||
91
yt_dlp/extractor/germanupa.py
Normal file
91
yt_dlp/extractor/germanupa.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GermanupaIE(InfoExtractor):
|
||||
IE_DESC = 'germanupa.de'
|
||||
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
|
||||
'info_dict': {
|
||||
'id': '909179246',
|
||||
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'duration': 3987,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'audio, uses GenericIE',
|
||||
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
|
||||
'info_dict': {
|
||||
'id': '1867346676',
|
||||
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
|
||||
'ext': 'opus',
|
||||
'timestamp': 1720545088,
|
||||
'upload_date': '20240709',
|
||||
'duration': 3910.557,
|
||||
'like_count': int,
|
||||
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
|
||||
'uploader': 'German UPA',
|
||||
'repost_count': int,
|
||||
'genres': ['Science'],
|
||||
'license': 'all-rights-reserved',
|
||||
'uploader_url': 'https://soundcloud.com/user-80097677',
|
||||
'uploader_id': '471579486',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
|
||||
},
|
||||
}, {
|
||||
'note': 'Nur für Mitglieder/Just for members',
|
||||
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
|
||||
'info_dict': {
|
||||
'id': '986994430',
|
||||
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20240719',
|
||||
'uploader_url': 'https://vimeo.com/germanupa',
|
||||
'timestamp': 1721373980,
|
||||
'license': 'by-sa',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
|
||||
'duration': 2146,
|
||||
'release_timestamp': 1721373980,
|
||||
'uploader': 'German UPA',
|
||||
'uploader_id': 'germanupa',
|
||||
'upload_date': '20240719',
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
param_url = traverse_obj(
|
||||
self._search_regex(
|
||||
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
|
||||
webpage, 'embedded video', default=None, group='url'),
|
||||
({parse_qs}, 'url', 0, {url_or_none}))
|
||||
|
||||
if not param_url:
|
||||
if self._search_regex(
|
||||
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
|
||||
webpage, 'login wrapper', default=None):
|
||||
self.raise_login_required('This video is only available for members')
|
||||
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
|
||||
|
||||
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
|
||||
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)
|
||||
@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GolemIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
|
||||
@@ -8,15 +8,19 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
IE_NAME = 'huya:live'
|
||||
IE_DESC = 'huya.com'
|
||||
TESTS = [{
|
||||
@@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '572329',
|
||||
'title': str,
|
||||
'ext': 'flv',
|
||||
'description': str,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
@@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
|
||||
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
|
||||
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
|
||||
return fm, ss
|
||||
|
||||
|
||||
class HuyaVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
|
||||
IE_NAME = 'huya:video'
|
||||
IE_DESC = '虎牙视频'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.huya.com/video/play/1002412640.html',
|
||||
'info_dict': {
|
||||
'id': '1002412640',
|
||||
'ext': 'mp4',
|
||||
'title': '8月3日',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 14,
|
||||
'uploader': '虎牙-ATS欧卡车队青木',
|
||||
'uploader_id': '1564376151',
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.huya.com/video/play/556054543.html',
|
||||
'info_dict': {
|
||||
'id': '556054543',
|
||||
'ext': 'mp4',
|
||||
'title': '我不挑事 也不怕事',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 1864,
|
||||
'uploader': '卡尔',
|
||||
'uploader_id': '367138632',
|
||||
'upload_date': '20210811',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url: str):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent', video_id,
|
||||
query={'videoId': video_id})['data']['moment']['videoInfo']
|
||||
|
||||
formats = []
|
||||
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': definition['url'],
|
||||
**traverse_obj(definition, {
|
||||
'format_id': ('defName', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('videoCover', {url_or_none}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'uploader': ('nickName', {str}),
|
||||
'uploader_id': ('uid', {str_or_none}),
|
||||
'upload_date': ('videoUploadTime', {unified_strdate}),
|
||||
'view_count': ('videoPlayNum', {int_or_none}),
|
||||
'comment_count': ('videoCommentNum', {int_or_none}),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ImgurIE(ImgurBaseIE):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
@@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE):
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 'MRW gifv is up and running without any bugs',
|
||||
'timestamp': 1416446068,
|
||||
'upload_date': '20141120',
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1416446068,
|
||||
'release_date': '20141120',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
'only_matching': True,
|
||||
@@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE):
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1710491255,
|
||||
'release_date': '20240315',
|
||||
'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -208,7 +225,10 @@ class ImgurIE(ImgurBaseIE):
|
||||
}), get_all=False),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': url_or_none(search('thumbnailUrl')),
|
||||
'thumbnails': [{
|
||||
'url': thumbnail_url,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None,
|
||||
'http_headers': {'Accept': '*/*'},
|
||||
}
|
||||
|
||||
@@ -252,17 +272,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE):
|
||||
|
||||
class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:gallery'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/gallery/Q95ko',
|
||||
'info_dict': {
|
||||
'id': 'Q95ko',
|
||||
'title': 'Adding faces make every GIF better',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
'skip': 'Zoinks! You\'ve taken a wrong turn.',
|
||||
}, {
|
||||
# TODO: static images - replace with animated/video gallery
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'only_matching': True,
|
||||
@@ -280,7 +292,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'release_timestamp': 1358554297,
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'release_date': '20130119',
|
||||
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# Test with slug
|
||||
'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx',
|
||||
'add_ies': ['Imgur'],
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
'timestamp': 1358554297,
|
||||
'upload_date': '20130119',
|
||||
'uploader_id': '1648642',
|
||||
'uploader': 'wittyusernamehere',
|
||||
'release_timestamp': 1358554297,
|
||||
'release_date': '20130119',
|
||||
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
|
||||
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
@@ -317,6 +349,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ',
|
||||
'info_dict': {
|
||||
'id': '6lAn9VQ',
|
||||
'title': 'Penguins !',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
|
||||
'add_ies': ['Imgur'],
|
||||
@@ -357,7 +396,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||
|
||||
class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
IE_NAME = 'imgur:album'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
|
||||
_GALLERY = False
|
||||
_TESTS = [{
|
||||
# TODO: only static images - replace with animated/video gallery
|
||||
@@ -372,6 +411,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# Test with URL slug
|
||||
'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX',
|
||||
'info_dict': {
|
||||
'id': 'iX265HX',
|
||||
'title': 'enen-no-shouboutai',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://imgur.com/a/8pih2Ed',
|
||||
'info_dict': {
|
||||
|
||||
@@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor):
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@@ -435,10 +434,10 @@ class InstagramIE(InstagramBaseIE):
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'doc_id': '8845758582119845',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
||||
|
||||
if not general_info:
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
|
||||
@@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
|
||||
'id': 'p51388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
|
||||
'upload_date': '20201103',
|
||||
'timestamp': 1604437480,
|
||||
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
|
||||
'episode': 'Partička (92)',
|
||||
'season': 'Partička',
|
||||
'series': 'Prima Partička',
|
||||
'episode_number': 92,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
|
||||
'info_dict': {
|
||||
'id': 'p1412199',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 3,
|
||||
'episode': 'Tenerife: V říši ohně',
|
||||
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
|
||||
'duration': 3111.0,
|
||||
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
|
||||
'title': 'Tenerife: V říši ohně',
|
||||
'timestamp': 1711825800,
|
||||
'upload_date': '20240330',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
@@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
|
||||
video_id = self._search_regex((
|
||||
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
|
||||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
@@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
|
||||
final_result = self._search_json_ld(webpage, video_id, default={})
|
||||
final_result.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': final_result.get('title') or title,
|
||||
'thumbnail': self._html_search_meta(
|
||||
['thumbnail', 'og:image', 'twitter:image'],
|
||||
webpage, 'thumbnail', default=None),
|
||||
|
||||
@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
|
||||
|
||||
|
||||
class SangiinInstructionIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
IE_DESC = False # this shouldn't be listed as a supported site
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
||||
raise ExtractorError(
|
||||
'Copy the link from the button below the video description/player '
|
||||
'and use that link to download. If there is no button in the frame, '
|
||||
'get the URL of the frame showing the video.', expected=True)
|
||||
|
||||
|
||||
class SangiinIE(InfoExtractor):
|
||||
|
||||
@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
|
||||
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
|
||||
_PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
|
||||
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
@@ -39,7 +39,7 @@ class KhanAcademyBaseIE(InfoExtractor):
|
||||
query={
|
||||
'fastly_cacheable': 'persist_until_publish',
|
||||
'pcv': self._PUBLISHED_CONTENT_VERSION,
|
||||
'hash': '1242644265',
|
||||
'hash': '3712657851',
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'countryCode': 'US',
|
||||
|
||||
@@ -67,7 +67,7 @@ class KickIE(KickBaseIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if KickClipIE.suitable(url) else super().suitable(url)
|
||||
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
@@ -98,25 +98,25 @@ class KickIE(KickBaseIE):
|
||||
|
||||
class KickVODIE(KickBaseIE):
|
||||
IE_NAME = 'kick:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c',
|
||||
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
||||
'info_dict': {
|
||||
'id': 'e74614f4-5270-4319-90ad-32179f19a45c',
|
||||
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
|
||||
'ext': 'mp4',
|
||||
'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+',
|
||||
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
|
||||
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
|
||||
'channel': 'xqc',
|
||||
'channel_id': '668',
|
||||
'uploader': 'xQc',
|
||||
'uploader_id': '676',
|
||||
'upload_date': '20240724',
|
||||
'timestamp': 1721796562,
|
||||
'duration': 18566.0,
|
||||
'upload_date': '20240909',
|
||||
'timestamp': 1725919141,
|
||||
'duration': 10155.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['VALORANT'],
|
||||
'categories': ['Just Chatting'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
@@ -148,7 +148,7 @@ class KickVODIE(KickBaseIE):
|
||||
|
||||
class KickClipIE(KickBaseIE):
|
||||
IE_NAME = 'kick:clips'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?P<id>clip_[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||
'info_dict': {
|
||||
@@ -189,6 +189,26 @@ class KickClipIE(KickBaseIE):
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'info_dict': {
|
||||
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
|
||||
'ext': 'mp4',
|
||||
'title': 'KLJASLDJKLJKASDLJKDAS',
|
||||
'channel': 'spreen',
|
||||
'channel_id': '5312671',
|
||||
'uploader': 'AnormalBarraBaja',
|
||||
'uploader_id': '26518262',
|
||||
'duration': 43.0,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727399987,
|
||||
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': ['Minecraft'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
126
yt_dlp/extractor/kika.py
Normal file
126
yt_dlp/extractor/kika.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class KikaIE(InfoExtractor):
|
||||
IE_DESC = 'KiKA.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
|
||||
'info_dict': {
|
||||
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20240831',
|
||||
'timestamp': 1725126600,
|
||||
'season_number': 2024,
|
||||
'modified_date': '20240831',
|
||||
'episode': 'Episode 476',
|
||||
'episode_number': 476,
|
||||
'season': 'Season 2024',
|
||||
'duration': 634,
|
||||
'title': 'logo! vom Samstag, 31. August 2024',
|
||||
'modified_timestamp': 1725129983,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/kaltstart/videos/video92498',
|
||||
'md5': '710ece827e5055094afeb474beacb7aa',
|
||||
'info_dict': {
|
||||
'id': 'video92498',
|
||||
'ext': 'mp4',
|
||||
'title': '7. Wo ist Leo?',
|
||||
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
|
||||
'duration': 436,
|
||||
'timestamp': 1702926876,
|
||||
'upload_date': '20231218',
|
||||
'episode_number': 7,
|
||||
'modified_date': '20240319',
|
||||
'modified_timestamp': 1710880610,
|
||||
'episode': 'Episode 7',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
|
||||
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
|
||||
'info_dict': {
|
||||
'id': 'video90088',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20221102',
|
||||
'timestamp': 1667390580,
|
||||
'duration': 197,
|
||||
'modified_timestamp': 1711093771,
|
||||
'episode_number': 8,
|
||||
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
|
||||
'modified_date': '20240322',
|
||||
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 8',
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
|
||||
video_assets = self._download_json(doc['assets']['url'], video_id)
|
||||
|
||||
subtitles = {}
|
||||
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
|
||||
subtitles['de'] = [{
|
||||
'url': ttml_resource,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': webvtt_resource,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': list(self._extract_formats(video_assets, video_id)),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(doc, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'modified_timestamp': ('modificationDate', {parse_iso8601}),
|
||||
'duration': ((
|
||||
('durationInSeconds', {int_or_none}),
|
||||
('duration', {parse_duration})), any),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
|
||||
stream_url = media['url']
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'm3u8':
|
||||
yield from self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
yield {
|
||||
'url': stream_url,
|
||||
'format_id': ext,
|
||||
**traverse_obj(media, {
|
||||
'width': ('frameWidth', {int_or_none}),
|
||||
'height': ('frameHeight', {int_or_none}),
|
||||
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
|
||||
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
|
||||
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
}),
|
||||
}
|
||||
@@ -136,6 +136,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
IE_DESC = 'odysee.com'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
|
||||
(?:\$/(?:download|embed)/)?
|
||||
(?P<id>
|
||||
@@ -364,6 +365,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
IE_DESC = 'odysee.com channels'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
@@ -391,6 +393,7 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
|
||||
class LBRYPlaylistIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:playlist'
|
||||
IE_DESC = 'odysee.com playlists'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
|
||||
@@ -1,86 +1,11 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||
'info_dict': {
|
||||
'id': '10809',
|
||||
'ext': 'mp4',
|
||||
'title': "Put'ka: Trys Klausimai",
|
||||
'upload_date': '20161216',
|
||||
'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
|
||||
'age_limit': 18,
|
||||
'duration': 117,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1481904000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
|
||||
'info_dict': {
|
||||
'id': '10467',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nėrdas: Kompiuterio Valymas',
|
||||
'upload_date': '20150113',
|
||||
'description': 'md5:7352d113a242a808676ff17e69db6a69',
|
||||
'age_limit': 18,
|
||||
'duration': 346,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421164800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_LIMITS = {
|
||||
'N-7': 7,
|
||||
'N-14': 14,
|
||||
'S': 18,
|
||||
}
|
||||
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
|
||||
display_id)['videoConfig']['videoInfo']
|
||||
|
||||
video_id = str(video_info['id'])
|
||||
title = video_info['title']
|
||||
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'description': clean_html(video_info.get('htmlDescription')),
|
||||
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
|
||||
'timestamp': parse_iso8601(video_info.get('airDate')),
|
||||
'view_count': int_or_none(video_info.get('viewsCount')),
|
||||
}
|
||||
|
||||
|
||||
class LnkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'
|
||||
|
||||
@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response
|
||||
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': '8488817242a0db1cb2ad0ea522553cf6',
|
||||
'md5': 'b321d261656848c184a94e3b93eae28d',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
|
||||
'timestamp': 1657216459,
|
||||
'duration': 181,
|
||||
},
|
||||
'params': {'format': 'bestvideo'}, # Test video-only fixup
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
|
||||
format_url = format_url.replace('-split.m3u8', '.m3u8')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
|
||||
# Sometimes only split video/audio formats are available, need to fixup video-only formats
|
||||
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
|
||||
for fmt in m3u8_formats:
|
||||
if is_not_premerged and fmt.get('vcodec') != 'none':
|
||||
fmt['acodec'] = 'none'
|
||||
yield {
|
||||
**fmt,
|
||||
'url': update_url(fmt['url'], query=query),
|
||||
|
||||
@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
|
||||
video_data = None
|
||||
|
||||
# fix meta_url if missing the host address
|
||||
if re.match(r'^\/\+\/', meta_url):
|
||||
if re.match(r'\/\+\/', meta_url):
|
||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||
|
||||
if meta_url:
|
||||
|
||||
@@ -13,8 +13,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
IE_DESC = 'MDR.DE'
|
||||
_VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
@@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
||||
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
||||
'info_dict': {
|
||||
'id': '19636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baumhaus vom 30. Oktober 2015',
|
||||
'duration': 134,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
||||
'info_dict': {
|
||||
'id': '8182',
|
||||
'ext': 'mp4',
|
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||
'timestamp': 1482541200,
|
||||
'upload_date': '20161224',
|
||||
'duration': 4628,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
# audio with alternative playerURL pattern
|
||||
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
|
||||
@@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
|
||||
'duration': 3239,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
# empty bitrateVideo and bitrateAudio
|
||||
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
|
||||
'info_dict': {
|
||||
'id': '128372',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der kleine Wichtel kehrt zurück',
|
||||
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
|
||||
'duration': 4876,
|
||||
'timestamp': 1607823300,
|
||||
'upload_date': '20201213',
|
||||
'uploader': 'ZDF',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
'skip': '404 not found',
|
||||
}, {
|
||||
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
|
||||
(?P<id>[^/#?_]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
|
||||
'info_dict': {
|
||||
'id': '668177',
|
||||
'title': 'Az ajtó',
|
||||
'display_id': 'az-ajto',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
|
||||
},
|
||||
}, {
|
||||
# (old) mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||
'info_dict': {
|
||||
@@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230903',
|
||||
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) m4sport
|
||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||
@@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230908',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# m4sport with *video/ url and no date
|
||||
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
|
||||
@@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# (old) hirado
|
||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||
@@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230911',
|
||||
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}, {
|
||||
# (old) petofilive
|
||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||
@@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'upload_date': '20230909',
|
||||
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -143,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
|
||||
if not playlist_url:
|
||||
raise ExtractorError('Unable to extract playlist url')
|
||||
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
IE_NAME = 'MangoTV'
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
_BASE_URL = 'http://ocw.mit.edu/'
|
||||
|
||||
_TESTS = [
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
from .telecinco import TelecincoIE
|
||||
from .telecinco import TelecincoBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
class MiTeleIE(TelecincoBaseIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
||||
'info_dict': {
|
||||
@@ -27,6 +26,7 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1471209401,
|
||||
'upload_date': '20160814',
|
||||
},
|
||||
'skip': 'HTTP Error 404 Not Found',
|
||||
}, {
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
@@ -49,6 +49,26 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/',
|
||||
'info_dict': {
|
||||
'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Horizonte Temporada 5 Programa 171',
|
||||
'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3',
|
||||
'episode': 'Las Zonas de Bajas Emisiones, a debate',
|
||||
'episode_number': 171,
|
||||
'season': 'Season 5',
|
||||
'season_number': 5,
|
||||
'series': 'Horizonte',
|
||||
'duration': 7012,
|
||||
'upload_date': '20240927',
|
||||
'timestamp': 1727416450,
|
||||
'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg',
|
||||
'age_limit': 12,
|
||||
},
|
||||
'params': {'geo_bypass_country': 'ES'},
|
||||
}, {
|
||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||
'only_matching': True,
|
||||
|
||||
121
yt_dlp/extractor/mojevideo.py
Normal file
121
yt_dlp/extractor/mojevideo.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, remove_end, update_url_query
|
||||
|
||||
|
||||
class MojevideoIE(InfoExtractor):
|
||||
IE_DESC = 'mojevideo.sk'
|
||||
_VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P<id>\w+)/(?P<display_id>[\w()]+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html',
|
||||
'md5': '384a4628bd2bbd261c5206cf77c38c17',
|
||||
'info_dict': {
|
||||
'id': '3d17c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chlapci dobetónovali sme, máme hotovo!',
|
||||
'display_id': 'chlapci_dobetonovali_sme_mame_hotovo',
|
||||
'description': 'md5:a0822126044050d304a9ef58c92ddb34',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg',
|
||||
'duration': 21.0,
|
||||
'upload_date': '20230919',
|
||||
'timestamp': 1695129706,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html',
|
||||
'md5': '517c3e111c53a67d10b429c1f344ba2f',
|
||||
'info_dict': {
|
||||
'id': '14677',
|
||||
'ext': 'mp4',
|
||||
'title': 'Deň blbec?',
|
||||
'display_id': 'den_blbec',
|
||||
'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg',
|
||||
'duration': 100.0,
|
||||
'upload_date': '20120515',
|
||||
'timestamp': 1337076481,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html',
|
||||
'md5': '64599a23d3ac31cf2fe069e4353d8162',
|
||||
'info_dict': {
|
||||
'id': '2feb2',
|
||||
'ext': 'mp4',
|
||||
'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)',
|
||||
'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)',
|
||||
'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.',
|
||||
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg',
|
||||
'duration': 240.0,
|
||||
'upload_date': '20190708',
|
||||
'timestamp': 1562576592,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 720p
|
||||
'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 1080p
|
||||
'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id_dec = self._search_regex(
|
||||
r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16))
|
||||
video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry')
|
||||
video_hashes = self._search_json(
|
||||
r'\bvHash\s*=', webpage, 'video hashes', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
|
||||
('', 1, 'normálna kvalita'),
|
||||
('_lq', 0, 'nízka kvalita'),
|
||||
('_hd', 2, 'HD-720p'),
|
||||
('_fhd', 3, 'FULL HD-1080p'),
|
||||
('_2k', 4, '2K-1440p'),
|
||||
]):
|
||||
formats.append({
|
||||
'format_id': f'mp4-{quality}',
|
||||
'quality': quality,
|
||||
'format_note': format_note,
|
||||
'url': update_url_query(
|
||||
f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', {
|
||||
'md5': video_hash,
|
||||
'expires': video_exp,
|
||||
}),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'title': (self._og_search_title(webpage, default=None)
|
||||
or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')),
|
||||
'description': self._og_search_description(webpage),
|
||||
**self._search_json_ld(webpage, video_id, default={}),
|
||||
}
|
||||
@@ -371,7 +371,7 @@ class NexxIE(InfoExtractor):
|
||||
# not all videos work via arc, e.g. nexx:741:1269984
|
||||
if not video:
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(1e4, 99999)}{random.randint(1, 9)}'
|
||||
device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(10000, 99999)}{random.randint(1, 9)}'
|
||||
|
||||
result = self._call_api(domain_id, 'session/init', video_id, data={
|
||||
'nxp_devh': device_id,
|
||||
|
||||
@@ -11,9 +11,12 @@ from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
get_element_by_class,
|
||||
traverse_obj,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NFLBaseIE(InfoExtractor):
|
||||
@@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor):
|
||||
'osVersion': '10.0',
|
||||
}, separators=(',', ':')).encode()).decode(),
|
||||
'networkType': 'other',
|
||||
'nflClaimGroupsToAdd': [],
|
||||
'nflClaimGroupsToRemove': [],
|
||||
'peacockUUID': 'undefined',
|
||||
}
|
||||
_ACCOUNT_INFO = {}
|
||||
_API_KEY = None
|
||||
_API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
|
||||
|
||||
_TOKEN = None
|
||||
_TOKEN_EXPIRY = 0
|
||||
|
||||
def _get_account_info(self, url, slug):
|
||||
if not self._API_KEY:
|
||||
webpage = self._download_webpage(url, slug, fatal=False) or ''
|
||||
self._API_KEY = self._search_regex(
|
||||
r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
|
||||
fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
|
||||
|
||||
def _get_account_info(self):
|
||||
cookies = self._get_cookies('https://auth-id.nfl.com/')
|
||||
login_token = traverse_obj(cookies, (
|
||||
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
|
||||
@@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor):
|
||||
'or else try using --cookies-from-browser instead', expected=True)
|
||||
|
||||
account = self._download_json(
|
||||
'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
|
||||
'https://auth-id.nfl.com/accounts.getAccountInfo', None,
|
||||
note='Downloading account info', data=urlencode_postdata({
|
||||
'include': 'profile,data',
|
||||
'lang': 'en',
|
||||
@@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor):
|
||||
'sdk': 'js_latest',
|
||||
'login_token': login_token,
|
||||
'authMode': 'cookie',
|
||||
'pageURL': url,
|
||||
'pageURL': 'https://www.nfl.com/',
|
||||
'sdkBuild': traverse_obj(cookies, (
|
||||
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
|
||||
'format': 'json',
|
||||
@@ -126,55 +122,78 @@ class NFLBaseIE(InfoExtractor):
|
||||
if len(self._ACCOUNT_INFO) != 3:
|
||||
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
|
||||
|
||||
def _get_auth_token(self, url, slug):
|
||||
def _get_auth_token(self):
|
||||
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
|
||||
return
|
||||
|
||||
if not self._ACCOUNT_INFO:
|
||||
self._get_account_info(url, slug)
|
||||
|
||||
token = self._download_json(
|
||||
'https://api.nfl.com/identity/v3/token%s' % (
|
||||
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
|
||||
slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
|
||||
None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
|
||||
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
|
||||
|
||||
self._TOKEN = token['accessToken']
|
||||
self._TOKEN_EXPIRY = token['expiresIn']
|
||||
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
|
||||
|
||||
def _extract_video(self, mcp_id, is_live=False):
|
||||
self._get_auth_token()
|
||||
data = self._download_json(
|
||||
f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
|
||||
'Authorization': f'Bearer {self._TOKEN}',
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode())
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': mcp_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
|
||||
**traverse_obj(data, ('metadata', {
|
||||
'title': ('event', ('def_title', 'friendlyName'), {str}, any),
|
||||
'description': ('event', 'def_description', {str}),
|
||||
'duration': ('event', 'duration', {int_or_none}),
|
||||
'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
|
||||
})),
|
||||
}
|
||||
|
||||
def _parse_video_config(self, video_config, display_id):
|
||||
video_config = self._parse_json(video_config, display_id)
|
||||
is_live = traverse_obj(video_config, ('live', {bool})) or False
|
||||
item = video_config['playlist'][0]
|
||||
mcp_id = item.get('mcpID')
|
||||
if mcp_id:
|
||||
info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
|
||||
if mcp_id := item.get('mcpID'):
|
||||
return self._extract_video(mcp_id, is_live=is_live)
|
||||
|
||||
info = {'id': item.get('id') or item['entityId']}
|
||||
|
||||
item_url = item['url']
|
||||
ext = determine_ext(item_url)
|
||||
if ext == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4')
|
||||
else:
|
||||
media_id = item.get('id') or item['entityId']
|
||||
title = item.get('title')
|
||||
item_url = item['url']
|
||||
info = {'id': media_id}
|
||||
ext = determine_ext(item_url)
|
||||
if ext == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
|
||||
else:
|
||||
info['url'] = item_url
|
||||
if item.get('audio') is True:
|
||||
info['vcodec'] = 'none'
|
||||
is_live = video_config.get('live') is True
|
||||
thumbnails = None
|
||||
image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
|
||||
if image_url:
|
||||
thumbnails = [{
|
||||
'url': image_url,
|
||||
'ext': determine_ext(image_url, 'jpg'),
|
||||
}]
|
||||
info.update({
|
||||
'title': title,
|
||||
'is_live': is_live,
|
||||
'description': clean_html(item.get('description')),
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
info['url'] = item_url
|
||||
if item.get('audio') is True:
|
||||
info['vcodec'] = 'none'
|
||||
|
||||
thumbnails = None
|
||||
if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none):
|
||||
thumbnails = [{
|
||||
'url': image_url,
|
||||
'ext': determine_ext(image_url, 'jpg'),
|
||||
}]
|
||||
|
||||
info.update({
|
||||
**traverse_obj(item, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
}),
|
||||
'is_live': is_live,
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'NFL',
|
||||
'tags': 'count:6',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'duration': 157,
|
||||
'categories': 'count:3',
|
||||
'_old_archive_ids': ['anvato 899441'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
|
||||
'md5': '6886b32c24b463038c760ceb55a34566',
|
||||
'md5': '92a517f05bd3eb50fe50244bc621aec8',
|
||||
'info_dict': {
|
||||
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
|
||||
'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
|
||||
'ext': 'mp3',
|
||||
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
|
||||
'description': 'md5:12ada8ee70e6762658c30e223e095075',
|
||||
'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855',
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
|
||||
'only_matching': True,
|
||||
@@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
entries = []
|
||||
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
||||
entries.append(self._parse_video_config(video_config, display_id))
|
||||
|
||||
def entries():
|
||||
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
||||
yield self._parse_video_config(video_config, display_id)
|
||||
|
||||
title = clean_html(get_element_by_class(
|
||||
'nfl-c-article__title', webpage)) or self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage)
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
|
||||
return self.playlist_result(entries(), display_id, title)
|
||||
|
||||
|
||||
class NFLPlusReplayIE(NFLBaseIE):
|
||||
@@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||
'all_22': 'All-22',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._get_account_info()
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||
requested_types = self._configuration_arg('type', ['all'])
|
||||
@@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
|
||||
|
||||
if not video_id:
|
||||
self._get_auth_token(url, slug)
|
||||
self._get_auth_token()
|
||||
headers = {'Authorization': f'Bearer {self._TOKEN}'}
|
||||
game_id = self._download_json(
|
||||
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
|
||||
@@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
|
||||
|
||||
if video_id:
|
||||
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
||||
return self._extract_video(video_id)
|
||||
|
||||
def entries():
|
||||
for replay in traverse_obj(
|
||||
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
|
||||
):
|
||||
video_id = replay['mcpPlaybackId']
|
||||
yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
||||
yield self._extract_video(replay['mcpPlaybackId'])
|
||||
|
||||
return self.playlist_result(entries(), slug)
|
||||
|
||||
@@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._get_account_info()
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug = self._match_id(url)
|
||||
self._get_auth_token(url, slug)
|
||||
self._get_auth_token()
|
||||
video_id = self._download_json(
|
||||
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
|
||||
'Authorization': f'Bearer {self._TOKEN}',
|
||||
})['mcpPlaybackId']
|
||||
|
||||
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
||||
return self._extract_video(video_id)
|
||||
|
||||
@@ -420,7 +420,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'x-request-with': 'https://www.nicovideo.jp',
|
||||
})['data']['contentUrl']
|
||||
# Getting all audio formats results in duplicate video formats which we filter out later
|
||||
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
|
||||
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
|
||||
|
||||
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
|
||||
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
|
||||
@@ -432,7 +432,6 @@ class NiconicoIE(InfoExtractor):
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
}), get_all=False),
|
||||
'acodec': 'aac',
|
||||
'ext': 'm4a',
|
||||
}
|
||||
|
||||
# Sort before removing dupes to keep the format dicts with the lowest tbr
|
||||
@@ -870,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE):
|
||||
|
||||
|
||||
class NiconicoUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'https://www.nicovideo.jp/user/419948',
|
||||
'info_dict': {
|
||||
@@ -878,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}
|
||||
_API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
|
||||
_API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
_API_HEADERS = {
|
||||
@@ -898,12 +897,13 @@ class NiconicoUserIE(InfoExtractor):
|
||||
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||
for entry in json_parsed['data']['items']:
|
||||
count += 1
|
||||
yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id']))
|
||||
yield self.url_result(
|
||||
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
||||
|
||||
class NiconicoLiveIE(InfoExtractor):
|
||||
|
||||
@@ -43,14 +43,8 @@ class NoodleMagazineIE(InfoExtractor):
|
||||
def build_url(url_or_path):
|
||||
return urljoin('https://adult.noodlemagazine.com', url_or_path)
|
||||
|
||||
headers = {'Referer': url}
|
||||
player_path = self._html_search_regex(
|
||||
r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path')
|
||||
player_iframe = self._download_webpage(
|
||||
build_url(player_path), video_id, 'Downloading iframe page', headers=headers)
|
||||
playlist_url = self._search_regex(
|
||||
r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
|
||||
playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers)
|
||||
playlist_info = self._search_json(
|
||||
r'window\.playlist\s*=', webpage, video_id, 'playlist info')
|
||||
|
||||
formats = []
|
||||
for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NZOnScreenIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
|
||||
class NZZIE(InfoExtractor):
|
||||
@@ -22,19 +19,14 @@ class NZZIE(InfoExtractor):
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _entries(self, webpage, page_id):
|
||||
for script in re.findall(r'(?s)<script[^>]* data-hid="jw-video-jw[^>]+>(.+?)</script>', webpage):
|
||||
settings = self._search_json(r'var\s+settings\s*=[^{]*', script, 'settings', page_id, fatal=False)
|
||||
if entry := self._parse_jwplayer_data(settings, page_id):
|
||||
yield entry
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
entries = []
|
||||
for player_element in re.findall(
|
||||
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
|
||||
player_params = extract_attributes(player_element)
|
||||
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
||||
self.report_warning('Unsupported player type')
|
||||
continue
|
||||
entry_id = player_params['data-id']
|
||||
entries.append(self.url_result(
|
||||
'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
|
||||
|
||||
return self.playlist_result(entries, page_id)
|
||||
return self.playlist_result(self._entries(webpage, page_id), page_id)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import functools
|
||||
import itertools
|
||||
import urllib.parse
|
||||
|
||||
@@ -22,13 +23,19 @@ from ..utils import (
|
||||
|
||||
|
||||
class PatreonBaseIE(InfoExtractor):
|
||||
USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
|
||||
@functools.cached_property
|
||||
def patreon_user_agent(self):
|
||||
# Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
|
||||
# Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
|
||||
if self._get_cookies('https://www.patreon.com/').get('session_id'):
|
||||
return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
|
||||
return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
|
||||
|
||||
def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
if 'User-Agent' not in headers:
|
||||
headers['User-Agent'] = self.USER_AGENT
|
||||
headers['User-Agent'] = self.patreon_user_agent
|
||||
if query:
|
||||
query.update({'json-api-version': 1.0})
|
||||
|
||||
@@ -48,6 +55,7 @@ class PatreonBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class PatreonIE(PatreonBaseIE):
|
||||
IE_NAME = 'patreon'
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||
@@ -111,6 +119,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'chapters': 'count:4',
|
||||
'timestamp': 1423689666,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
@@ -221,6 +230,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# multiple attachments/embeds
|
||||
'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
|
||||
@@ -326,8 +336,13 @@ class PatreonIE(PatreonBaseIE):
|
||||
if embed_url and (urlh := self._request_webpage(
|
||||
embed_url, video_id, 'Checking embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=403)):
|
||||
# Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
|
||||
# to check for "Sorry, we couldn&rsquo;t find that page" in the meta description tag
|
||||
meta_description = clean_html(self._html_search_meta(
|
||||
'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None))
|
||||
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
|
||||
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
|
||||
if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page')
|
||||
or VidsIoIE.suitable(embed_url)):
|
||||
entries.append(self.url_result(smuggle_url(embed_url, headers)))
|
||||
|
||||
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
||||
@@ -419,15 +434,19 @@ class PatreonIE(PatreonBaseIE):
|
||||
|
||||
|
||||
class PatreonCampaignIE(PatreonBaseIE):
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P<campaign_id>\d+))|(?P<vanity>[-\w]+))'
|
||||
IE_NAME = 'patreon:campaign'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?patreon\.com/(?:
|
||||
(?:m|api/campaigns)/(?P<campaign_id>\d+)|
|
||||
(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||
)(?:/posts)?/?(?:$|[?#])'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.patreon.com/dissonancepod/',
|
||||
'info_dict': {
|
||||
'title': 'Cognitive Dissonance Podcast',
|
||||
'channel_url': 'https://www.patreon.com/dissonancepod',
|
||||
'id': '80642',
|
||||
'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7',
|
||||
'description': r're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*',
|
||||
'channel_id': '80642',
|
||||
'channel': 'Cognitive Dissonance Podcast',
|
||||
'age_limit': 0,
|
||||
@@ -442,31 +461,46 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
'url': 'https://www.patreon.com/m/4767637/posts',
|
||||
'info_dict': {
|
||||
'title': 'Not Just Bikes',
|
||||
'channel_follower_count': int,
|
||||
'id': '4767637',
|
||||
'channel_id': '4767637',
|
||||
'channel_url': 'https://www.patreon.com/notjustbikes',
|
||||
'description': 'md5:595c6e7dca76ae615b1d38c298a287a1',
|
||||
'description': r're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*',
|
||||
'age_limit': 0,
|
||||
'channel': 'Not Just Bikes',
|
||||
'uploader_url': 'https://www.patreon.com/notjustbikes',
|
||||
'uploader': 'Not Just Bikes',
|
||||
'uploader': 'Jason',
|
||||
'uploader_id': '37306634',
|
||||
'thumbnail': r're:^https?://.*$',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/api/campaigns/4243769/posts',
|
||||
'info_dict': {
|
||||
'title': 'Second Thought',
|
||||
'channel_follower_count': int,
|
||||
'id': '4243769',
|
||||
'channel_id': '4243769',
|
||||
'channel_url': 'https://www.patreon.com/secondthought',
|
||||
'description': r're:(?s).*Second Thought is an educational YouTube channel.*',
|
||||
'age_limit': 0,
|
||||
'channel': 'Second Thought',
|
||||
'uploader_url': 'https://www.patreon.com/secondthought',
|
||||
'uploader': 'JT Chapman',
|
||||
'uploader_id': '32718287',
|
||||
'thumbnail': r're:^https?://.*$',
|
||||
},
|
||||
'playlist_mincount': 201,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/dissonancepod/posts',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/m/5932659',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/api/campaigns/4243769',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PatreonIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _entries(self, campaign_id):
|
||||
cursor = None
|
||||
params = {
|
||||
@@ -493,7 +527,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
|
||||
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
|
||||
if campaign_id is None:
|
||||
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
|
||||
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
|
||||
campaign_id = self._search_nextjs_data(
|
||||
webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
|
||||
|
||||
|
||||
@@ -109,7 +109,7 @@ class PinterestBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class PinterestIE(PinterestBaseIE):
|
||||
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?P<id>\d+)'
|
||||
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?:[\w-]+--)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# formats found in data['videos']
|
||||
'url': 'https://www.pinterest.com/pin/664281013778109217/',
|
||||
@@ -174,6 +174,25 @@ class PinterestIE(PinterestBaseIE):
|
||||
}, {
|
||||
'url': 'https://co.pinterest.com/pin/824721750502199491/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://pinterest.com/pin/dive-into-serenity-blue-lagoon-pedi-nails-for-a-tranquil-and-refreshing-spa-experience-video-in-2024--2885187256207927',
|
||||
'info_dict': {
|
||||
'id': '2885187256207927',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dive into Serenity: Blue Lagoon Pedi Nails for a Tranquil and Refreshing Spa Experience! 💙💅',
|
||||
'description': 'md5:5da41c767d2317e42e49b663b0b2150f',
|
||||
'uploader': 'Glamour Artistry |Everyday Outfits, Luxury Fashion & Nail Designs',
|
||||
'uploader_id': '1142999717836434688',
|
||||
'upload_date': '20240702',
|
||||
'timestamp': 1719939156,
|
||||
'duration': 7.967,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'categories': 'count:9',
|
||||
'tags': ['#BlueLagoonPediNails', '#SpaExperience'],
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import functools
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
|
||||
@@ -628,8 +628,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
for e in page_entries:
|
||||
yield e
|
||||
yield from page_entries
|
||||
if not self._has_more(webpage):
|
||||
break
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
time_seconds,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
@@ -167,7 +168,7 @@ class RadikoBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class RadikoIE(RadikoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
# QRR (文化放送) station provides <desc>
|
||||
@@ -183,8 +184,9 @@ class RadikoIE(RadikoBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
station, video_id = self._match_valid_url(url).groups()
|
||||
vid_int = unified_timestamp(video_id, False)
|
||||
station, timestring = self._match_valid_url(url).group('station', 'timestring')
|
||||
video_id = join_nonempty(station, timestring)
|
||||
vid_int = unified_timestamp(timestring, False)
|
||||
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
|
||||
|
||||
auth_token, area_id = self._auth_client()
|
||||
@@ -207,7 +209,7 @@ class RadikoIE(RadikoBaseIE):
|
||||
'ft': radio_begin,
|
||||
'end_at': radio_end,
|
||||
'to': radio_end,
|
||||
'seek': video_id,
|
||||
'seek': timestring,
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RadioFranceIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||
IE_NAME = 'radiofrance'
|
||||
|
||||
_TEST = {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -17,7 +18,7 @@ from ..utils import (
|
||||
|
||||
class RedditIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'reddit'
|
||||
_VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?reddit(?:media)?\.com/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||
'info_dict': {
|
||||
@@ -251,15 +252,15 @@ class RedditIE(InfoExtractor):
|
||||
return {'en': [{'url': caption_url}]}
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
|
||||
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||
|
||||
data = self._download_json(
|
||||
f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
|
||||
if not data:
|
||||
fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
|
||||
self.to_screen(f'{host} request failed, retrying with {fallback_host}')
|
||||
try:
|
||||
data = self._download_json(
|
||||
f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
|
||||
f'https://www.reddit.com/{slug}/.json', video_id, expected_status=403)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError):
|
||||
self.raise_login_required('Account authentication is required')
|
||||
raise
|
||||
|
||||
if traverse_obj(data, 'error') == 403:
|
||||
reason = data.get('reason')
|
||||
|
||||
@@ -6,7 +6,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ReverbNationIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
||||
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import js_to_json
|
||||
|
||||
|
||||
class RTPIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
||||
'md5': 'e736ce0c665e459ddb818546220b4ef8',
|
||||
@@ -19,9 +19,25 @@ class RTPIE(InfoExtractor):
|
||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril',
|
||||
'md5': '9a81ed53f2b2197cfa7ed455b12f8ade',
|
||||
'info_dict': {
|
||||
'id': 'e757904',
|
||||
'ext': 'mp4',
|
||||
'title': '25 Curiosidades, 25 de Abril',
|
||||
'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_RX_OBFUSCATION = re.compile(r'''(?xs)
|
||||
@@ -49,17 +65,17 @@ class RTPIE(InfoExtractor):
|
||||
|
||||
f, config = self._search_regex(
|
||||
r'''(?sx)
|
||||
var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*
|
||||
(?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)?
|
||||
var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
|
||||
''', webpage,
|
||||
'player config', group=('f', 'config'))
|
||||
|
||||
f = self._parse_json(
|
||||
f, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
config = self._parse_json(
|
||||
config, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
f = config['file'] if not f else self._parse_json(
|
||||
f, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
|
||||
formats = []
|
||||
if isinstance(f, dict):
|
||||
|
||||
@@ -8,14 +8,17 @@ from ..utils import (
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
format_field,
|
||||
get_element_by_class,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -382,8 +385,10 @@ class RumbleChannelIE(InfoExtractor):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
break
|
||||
raise
|
||||
for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
|
||||
yield self.url_result('https://rumble.com' + video_url)
|
||||
for video_url in traverse_obj(
|
||||
get_elements_html_by_class('videostream__link', webpage), (..., {extract_attributes}, 'href'),
|
||||
):
|
||||
yield self.url_result(urljoin('https://rumble.com', video_url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
@@ -6,6 +6,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
@@ -80,6 +81,8 @@ class RutubeBaseIE(InfoExtractor):
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False))
|
||||
return formats
|
||||
|
||||
def _download_and_extract_formats(self, video_id, query=None):
|
||||
@@ -90,7 +93,7 @@ class RutubeBaseIE(InfoExtractor):
|
||||
class RutubeIE(RutubeBaseIE):
|
||||
IE_NAME = 'rutube'
|
||||
IE_DESC = 'Rutube videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
|
||||
|
||||
_TESTS = [{
|
||||
@@ -164,6 +167,29 @@ class RutubeIE(RutubeBaseIE):
|
||||
'uploader': 'Стас Быков',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m'],
|
||||
}, {
|
||||
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
|
||||
'info_dict': {
|
||||
'id': 'c58f502c7bb34a8fcdd976b221fca292',
|
||||
'ext': 'mp4',
|
||||
'categories': ['Телепередачи'],
|
||||
'description': '',
|
||||
'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
|
||||
'live_status': 'is_live',
|
||||
'age_limit': 0,
|
||||
'uploader_id': '23460655',
|
||||
'timestamp': 1652972968,
|
||||
'view_count': int,
|
||||
'upload_date': '20220519',
|
||||
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader': 'Первый канал',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -36,7 +36,7 @@ class SampleFocusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||
|
||||
sample_id = self._search_regex(
|
||||
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
|
||||
@@ -82,7 +82,15 @@ class SampleFocusIE(InfoExtractor):
|
||||
return {
|
||||
'id': sample_id,
|
||||
'title': title,
|
||||
'url': mp3_url,
|
||||
'formats': [{
|
||||
'url': mp3_url,
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
}],
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
|
||||
33
yt_dlp/extractor/screenrec.py
Normal file
33
yt_dlp/extractor/screenrec.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ScreenRecIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?screenrec\.com/share/(?P<id>\w{10})'
|
||||
_TESTS = [{
|
||||
'url': 'https://screenrec.com/share/DasLtbknYo',
|
||||
'info_dict': {
|
||||
'id': 'DasLtbknYo',
|
||||
'ext': 'mp4',
|
||||
'title': '02.05.2024_03.01.25_REC',
|
||||
'description': 'Recorded with ScreenRec',
|
||||
'thumbnail': r're:^https?://.*\.gif$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'customUrl\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 URL', group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'),
|
||||
}
|
||||
36
yt_dlp/extractor/sen.py
Normal file
36
yt_dlp/extractor/sen.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sen\.com/video/(?P<id>[0-9a-f-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.sen.com/video/eef46eb1-4d79-4e28-be9d-bd937767f8c4',
|
||||
'md5': 'ff615aca9691053c94f8f10d96cd7884',
|
||||
'info_dict': {
|
||||
'id': 'eef46eb1-4d79-4e28-be9d-bd937767f8c4',
|
||||
'ext': 'mp4',
|
||||
'description': 'Florida, 28 Sep 2022',
|
||||
'title': 'Hurricane Ian',
|
||||
'tags': ['North America', 'Storm', 'Weather'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_data = self._download_json(f'https://api.sen.com/content/public/video/{video_id}', video_id)
|
||||
m3u8_url = (traverse_obj(api_data, (
|
||||
'data', 'nodes', lambda _, v: v['id'] == 'player', 'video', 'url', {url_or_none}, any))
|
||||
or f'https://vod.sen.com/videos/{video_id}/manifest.m3u8')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||
**traverse_obj(api_data, ('data', 'nodes', lambda _, v: v['id'] == 'details', any, 'content', {
|
||||
'title': ('title', 'text', {str}),
|
||||
'description': ('descriptions', 0, 'text', {str}),
|
||||
'tags': ('badges', ..., 'text', {str}),
|
||||
})),
|
||||
}
|
||||
@@ -27,7 +27,7 @@ class ServusIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'AA-28BYCQNH92111',
|
||||
'ext': 'mp4',
|
||||
'title': 'Klettersteige in den Alpen',
|
||||
'title': 'Vie Ferrate - Klettersteige in den Alpen',
|
||||
'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2823,
|
||||
@@ -38,6 +38,7 @@ class ServusIE(InfoExtractor):
|
||||
'season_number': 11,
|
||||
'episode': 'Episode 8 - Vie Ferrate – Klettersteige in den Alpen',
|
||||
'episode_number': 8,
|
||||
'categories': ['Bergwelten'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -71,8 +72,11 @@ class ServusIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).upper()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
next_data = self._search_nextjs_data(webpage, video_id, fatal=False)
|
||||
|
||||
video = self._download_json(
|
||||
'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin',
|
||||
'https://api-player.redbull.com/stv/servus-tv-playnet',
|
||||
video_id, 'Downloading video JSON', query={'videoId': video_id})
|
||||
if not video.get('videoUrl'):
|
||||
self._report_errors(video)
|
||||
@@ -89,7 +93,7 @@ class ServusIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'description': self._get_description(video_id) or video.get('description'),
|
||||
'description': self._get_description(next_data) or video.get('description'),
|
||||
'thumbnail': video.get('poster'),
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'timestamp': unified_timestamp(video.get('currentSunrise')),
|
||||
@@ -100,16 +104,19 @@ class ServusIE(InfoExtractor):
|
||||
'episode_number': episode_number,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(next_data, ('props', 'pageProps', 'data', {
|
||||
'title': ('title', 'rendered', {str}),
|
||||
'timestamp': ('stv_date', 'raw', {int}),
|
||||
'duration': ('stv_duration', {float_or_none}),
|
||||
'categories': ('category_names', ..., {str}),
|
||||
})),
|
||||
}
|
||||
|
||||
def _get_description(self, video_id):
|
||||
info = self._download_json(
|
||||
f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page',
|
||||
video_id, fatal=False)
|
||||
|
||||
return join_nonempty(*traverse_obj(info, (
|
||||
('stv_short_description', 'stv_long_description'),
|
||||
{lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
|
||||
def _get_description(self, next_data):
|
||||
return join_nonempty(*traverse_obj(next_data, (
|
||||
'props', 'pageProps', 'data',
|
||||
('stv_short_description', 'stv_long_description'), {str},
|
||||
{lambda x: x.replace('\n\n', '\n')}, {unescapeHTML})), delim='\n\n')
|
||||
|
||||
def _report_errors(self, video):
|
||||
playability_errors = traverse_obj(video, ('playabilityErrors', ...))
|
||||
|
||||
76
yt_dlp/extractor/snapchat.py
Normal file
76
yt_dlp/extractor/snapchat.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SnapchatSpotlightIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?snapchat\.com/spotlight/(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
|
||||
'md5': '46c580f63592d0cbb76e974d2f9f0fcc',
|
||||
'info_dict': {
|
||||
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Views 💕',
|
||||
'description': '',
|
||||
'thumbnail': r're:https://cf-st\.sc-cdn\.net/d/kKJHIR1QAznRKK9jgYYDq\.256\.IRZXSOY',
|
||||
'duration': 4.665,
|
||||
'timestamp': 1637777831.369,
|
||||
'upload_date': '20211124',
|
||||
'repost_count': int,
|
||||
'uploader': 'shreypatel57',
|
||||
'uploader_url': 'https://www.snapchat.com/add/shreypatel57',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
|
||||
'md5': '4cd9626458c1a0e3e6dbe72c544a9ec2',
|
||||
'info_dict': {
|
||||
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Spotlight Snap',
|
||||
'description': 'How he flirt her teacher🤭🤭🤩😍 #kdrama#cdrama #dramaclips #dramaspotlight',
|
||||
'thumbnail': r're:https://cf-st\.sc-cdn\.net/i/ztfr6xFs0FOcFhwVczWfj\.256\.IRZXSOY',
|
||||
'duration': 10.91,
|
||||
'timestamp': 1722720291.307,
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'repost_count': int,
|
||||
'uploader': 'ganda0535',
|
||||
'uploader_url': 'https://www.snapchat.com/add/ganda0535',
|
||||
'tags': ['#dramaspotlight', '#dramaclips', '#cdrama', '#kdrama'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
page_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
video_data = traverse_obj(page_props, (
|
||||
'spotlightFeed', 'spotlightStories',
|
||||
lambda _, v: v['story']['storyId']['value'] == video_id, 'metadata', any), None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
**traverse_obj(video_data, ('videoMetadata', {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('uploadDateMs', {lambda x: float_or_none(x, 1000)}),
|
||||
'view_count': ('viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'repost_count': ('shareCount', {int_or_none}),
|
||||
'url': ('contentUrl', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'duration': ('durationMs', {lambda x: float_or_none(x, 1000)}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'uploader': ('creator', 'personCreator', 'username', {str}),
|
||||
'uploader_url': ('creator', 'personCreator', 'url', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(video_data, {
|
||||
'description': ('description', {str}),
|
||||
'tags': ('hashtags', ..., {str}),
|
||||
'view_count': ('engagementStats', 'viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'repost_count': ('engagementStats', 'shareCount', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -208,7 +208,6 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
|
||||
track_id = str(info['id'])
|
||||
title = info['title']
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
@@ -367,7 +366,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
|
||||
'uploader_url': user.get('permalink_url'),
|
||||
'timestamp': unified_timestamp(info.get('created_at')),
|
||||
'title': title,
|
||||
'title': info.get('title'),
|
||||
'description': info.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
@@ -377,7 +376,8 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
'like_count': extract_count('favoritings') or extract_count('likes'),
|
||||
'comment_count': extract_count('comment'),
|
||||
'repost_count': extract_count('reposts'),
|
||||
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
|
||||
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
|
||||
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
||||
'formats': formats if not extract_flat else None,
|
||||
}
|
||||
|
||||
@@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'repost_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||
'genres': [],
|
||||
},
|
||||
},
|
||||
# geo-restricted
|
||||
@@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
||||
'genres': ['Alternative'],
|
||||
'artists': ['The Royal Concept'],
|
||||
},
|
||||
},
|
||||
# private link
|
||||
@@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'repost_count': int,
|
||||
'view_count': int,
|
||||
'genres': ['Dance & EDM'],
|
||||
'artists': ['80M'],
|
||||
},
|
||||
},
|
||||
# private link, downloadable format
|
||||
@@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/oriuplift',
|
||||
'genres': ['Trance'],
|
||||
'artists': ['Ori Uplift'],
|
||||
},
|
||||
},
|
||||
# no album art, use avatar pic for thumbnail
|
||||
@@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/garyvee',
|
||||
'genres': [],
|
||||
'artists': ['MadReal'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
||||
@@ -2,7 +2,13 @@ import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, str_or_none, traverse_obj
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SubstackIE(InfoExtractor):
|
||||
@@ -43,6 +49,19 @@ class SubstackIE(InfoExtractor):
|
||||
'uploader': "Andrew Zimmern's Spilled Milk ",
|
||||
'uploader_id': '577659',
|
||||
},
|
||||
}, {
|
||||
# Podcast that needs its file extension resolved to mp3
|
||||
'url': 'https://persuasion1.substack.com/p/summers',
|
||||
'md5': '1456a755d46084744facdfac9edf900f',
|
||||
'info_dict': {
|
||||
'id': '141970405',
|
||||
'ext': 'mp3',
|
||||
'title': 'Larry Summers on What Went Wrong on Campus',
|
||||
'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.',
|
||||
'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg',
|
||||
'uploader': 'Persuasion',
|
||||
'uploader_id': '61579',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -89,7 +108,15 @@ class SubstackIE(InfoExtractor):
|
||||
post_type = webpage_info['post']['type']
|
||||
formats, subtitles = [], {}
|
||||
if post_type == 'podcast':
|
||||
formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
|
||||
fmt = {'url': webpage_info['post']['podcast_url']}
|
||||
if not determine_ext(fmt['url'], default_ext=None):
|
||||
# The redirected format URL expires but the original URL doesn't,
|
||||
# so we only want to extract the extension from this request
|
||||
fmt['ext'] = determine_ext(self._request_webpage(
|
||||
HEADRequest(fmt['url']), display_id,
|
||||
'Resolving podcast file extension',
|
||||
'Podcast URL is invalid').url)
|
||||
formats.append(fmt)
|
||||
elif post_type == 'video':
|
||||
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
|
||||
else:
|
||||
|
||||
@@ -472,7 +472,7 @@ class SVTPageIE(SVTBaseIE):
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
urql_state = self._search_json(
|
||||
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
|
||||
r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
|
||||
|
||||
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class Tele13IE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||
|
||||
@@ -2,15 +2,69 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
str_or_none,
|
||||
try_get,
|
||||
traverse_obj,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TelecincoIE(InfoExtractor):
|
||||
class TelecincoBaseIE(InfoExtractor):
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
config = self._download_json(
|
||||
content['dataConfig'], video_id, 'Downloading config JSON')
|
||||
services = config['services']
|
||||
caronte = self._download_json(services['caronte'], video_id)
|
||||
if traverse_obj(caronte, ('dls', 0, 'drm', {bool})):
|
||||
self.report_drm(video_id)
|
||||
|
||||
stream = caronte['dls'][0]['stream']
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Origin': re.match(r'https?://[^/]+', url).group(0),
|
||||
}
|
||||
geo_headers = {**headers, **self.geo_verification_headers()}
|
||||
|
||||
try:
|
||||
cdn = self._download_json(
|
||||
caronte['cerbero'], video_id, data=json.dumps({
|
||||
'bbx': caronte['bbx'],
|
||||
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
**geo_headers,
|
||||
})['tokens']['1']['cdn']
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
error_code = traverse_obj(
|
||||
self._webpage_read_content(error.cause.response, caronte['cerbero'], video_id, fatal=False),
|
||||
({json.loads}, 'code', {int}))
|
||||
if error_code == 4038:
|
||||
self.raise_geo_restricted(countries=['ES'])
|
||||
raise
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
update_url(stream, query=cdn), video_id, 'mp4', m3u8_id='hls', headers=geo_headers)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(config, ('info', 'title', {str})),
|
||||
'formats': formats,
|
||||
'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none}))
|
||||
or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)),
|
||||
'duration': traverse_obj(content, ('dataDuration', {int_or_none})),
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
|
||||
class TelecincoIE(TelecincoBaseIE):
|
||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
@@ -30,6 +84,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'duration': 662,
|
||||
},
|
||||
}],
|
||||
'skip': 'HTTP Error 410 Gone',
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
|
||||
@@ -40,23 +95,24 @@ class TelecincoIE(InfoExtractor):
|
||||
'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
|
||||
'duration': 79,
|
||||
},
|
||||
'skip': 'Redirects to main page',
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': 'eddb50291df704ce23c74821b995bcac',
|
||||
'md5': '5ce057f43f30b634fbaf0f18c71a140a',
|
||||
'info_dict': {
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
'title': '#DOYLACARA. Con la trata no hay trato',
|
||||
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
|
||||
'duration': 50,
|
||||
'thumbnail': 'https://album.mediaset.es/eimg/2017/11/02/1tlQLO5Q3mtKT24f3EaC24.jpg',
|
||||
},
|
||||
}, {
|
||||
# video in opening's content
|
||||
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
|
||||
'info_dict': {
|
||||
'id': '2907195140',
|
||||
'id': '1691427',
|
||||
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
|
||||
'description': 'md5:73f340a7320143d37ab895375b2bf13a',
|
||||
'description': r're:Fiorella, la sobrina de Edmundo Arrocet, concedió .{727}',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
@@ -65,6 +121,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
|
||||
'duration': 1015,
|
||||
'thumbnail': 'https://album.mediaset.es/eimg/2020/02/29/5opaC37lUhKlZ7FoDhiVC.jpg',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
@@ -81,66 +138,29 @@ class TelecincoIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
config = self._download_json(
|
||||
content['dataConfig'], video_id, 'Downloading config JSON')
|
||||
title = config['info']['title']
|
||||
services = config['services']
|
||||
caronte = self._download_json(services['caronte'], video_id)
|
||||
stream = caronte['dls'][0]['stream']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json;charset=UTF-8',
|
||||
'Origin': re.match(r'https?://[^/]+', url).group(0),
|
||||
})
|
||||
cdn = self._download_json(
|
||||
caronte['cerbero'], video_id, data=json.dumps({
|
||||
'bbx': caronte['bbx'],
|
||||
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
|
||||
}).encode(), headers=headers)['tokens']['1']['cdn']
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': int_or_none(content.get('dataDuration')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
article = self._parse_json(self._search_regex(
|
||||
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
|
||||
webpage, 'article'), display_id)['article']
|
||||
title = article.get('title')
|
||||
description = clean_html(article.get('leadParagraph')) or ''
|
||||
article = self._search_json(
|
||||
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
||||
webpage, 'article', display_id)['article']
|
||||
description = traverse_obj(article, ('leadParagraph', {clean_html}, filter))
|
||||
|
||||
if article.get('editorialType') != 'VID':
|
||||
entries = []
|
||||
body = [article.get('opening')]
|
||||
body.extend(try_get(article, lambda x: x['body'], list) or [])
|
||||
for p in body:
|
||||
if not isinstance(p, dict):
|
||||
continue
|
||||
content = p.get('content')
|
||||
if not content:
|
||||
continue
|
||||
|
||||
for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])):
|
||||
content = p['content']
|
||||
type_ = p.get('type')
|
||||
if type_ == 'paragraph':
|
||||
content_str = str_or_none(content)
|
||||
if content_str:
|
||||
description += content_str
|
||||
continue
|
||||
if type_ == 'video' and isinstance(content, dict):
|
||||
if type_ == 'paragraph' and isinstance(content, str):
|
||||
description = join_nonempty(description, content, delim='')
|
||||
elif type_ == 'video' and isinstance(content, dict):
|
||||
entries.append(self._parse_content(content, url))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, str_or_none(article.get('id')), title, description)
|
||||
content = article['opening']['content']
|
||||
info = self._parse_content(content, url)
|
||||
info.update({
|
||||
'description': description,
|
||||
})
|
||||
entries, str_or_none(article.get('id')),
|
||||
traverse_obj(article, ('title', {str})), clean_html(description))
|
||||
|
||||
info = self._parse_content(article['opening']['content'], url)
|
||||
info['description'] = description
|
||||
return info
|
||||
|
||||
@@ -1,33 +1,31 @@
|
||||
import base64
|
||||
import datetime as dt
|
||||
import functools
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin
|
||||
from ..utils import int_or_none, traverse_obj, url_or_none, urljoin
|
||||
|
||||
|
||||
class TenPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
|
||||
_NETRC_MACHINE = '10play'
|
||||
_TESTS = [{
|
||||
'url': 'https://10play.com.au/neighbours/web-extras/season-39/nathan-borg-is-the-first-aussie-actor-with-a-cochlear-implant-to-join-neighbours/tpv210128qupwd',
|
||||
'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz',
|
||||
'info_dict': {
|
||||
'id': '6226844312001',
|
||||
'id': '6336940246112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
|
||||
'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
|
||||
'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
|
||||
'duration': 186,
|
||||
'season': 'Season 39',
|
||||
'season_number': 39,
|
||||
'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
|
||||
'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
|
||||
'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach',
|
||||
'duration': 74,
|
||||
'season': 'Season 41',
|
||||
'season_number': 41,
|
||||
'series': 'Neighbours',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'uploader': 'Channel 10',
|
||||
'age_limit': 15,
|
||||
'timestamp': 1611810000,
|
||||
'upload_date': '20210128',
|
||||
'timestamp': 1694386800,
|
||||
'upload_date': '20230910',
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
@@ -35,21 +33,30 @@ class TenPlayIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
}, {
|
||||
'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh',
|
||||
'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
|
||||
'info_dict': {
|
||||
'id': '6192880312001',
|
||||
'id': '9000000000091177',
|
||||
'ext': 'mp4',
|
||||
'title': "Todd Sampson's Body Hack - S4 Ep. 2",
|
||||
'description': 'md5:fa278820ad90f08ea187f9458316ac74',
|
||||
'title': 'Neighbours - S42 Ep. 9107',
|
||||
'alt_title': 'Thu 05 Sep',
|
||||
'description': 'md5:37a1f4271be34b9ee2b533426a5fbaef',
|
||||
'duration': 1388,
|
||||
'episode': 'Episode 9107',
|
||||
'episode_number': 9107,
|
||||
'season': 'Season 42',
|
||||
'season_number': 42,
|
||||
'series': 'Neighbours',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'age_limit': 15,
|
||||
'timestamp': 1600770600,
|
||||
'upload_date': '20200922',
|
||||
'timestamp': 1725517860,
|
||||
'upload_date': '20240905',
|
||||
'uploader': 'Channel 10',
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
}, {
|
||||
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
|
||||
'only_matching': True,
|
||||
@@ -66,55 +73,42 @@ class TenPlayIE(InfoExtractor):
|
||||
'X': 18,
|
||||
}
|
||||
|
||||
def _get_bearer_token(self, video_id):
|
||||
username, password = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
|
||||
_timestamp = dt.datetime.now().strftime('%Y%m%d000000')
|
||||
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
|
||||
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
|
||||
'X-Network-Ten-Auth': _auth_header,
|
||||
}, data=urlencode_postdata({
|
||||
'email': username,
|
||||
'password': password,
|
||||
}))
|
||||
return 'Bearer ' + data['jwt']['accessToken']
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://10play.com.au/api/v1/videos/' + content_id, content_id)
|
||||
headers = {}
|
||||
|
||||
if data.get('memberGated') is True:
|
||||
_token = self._get_bearer_token(content_id)
|
||||
headers = {'Authorization': _token}
|
||||
|
||||
_video_url = self._download_json(
|
||||
data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
|
||||
headers=headers).get('source')
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
_video_url), content_id).url
|
||||
video_data = self._download_json(
|
||||
f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}',
|
||||
content_id, 'Downloading video JSON')
|
||||
m3u8_url = self._request_webpage(
|
||||
HEADRequest(video_data['items'][0]['HLSURL']),
|
||||
content_id, 'Checking stream URL').url
|
||||
if '10play-not-in-oz' in m3u8_url:
|
||||
self.raise_geo_restricted(countries=['AU'])
|
||||
# Attempt to get a higher quality stream
|
||||
m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'formats': formats,
|
||||
'subtitles': {'en': [{'url': data.get('captionUrl')}]} if data.get('captionUrl') else None,
|
||||
'id': data.get('altId') or content_id,
|
||||
'duration': data.get('duration'),
|
||||
'title': data.get('subtitle'),
|
||||
'alt_title': data.get('title'),
|
||||
'description': data.get('description'),
|
||||
'age_limit': self._AUS_AGES.get(data.get('classification')),
|
||||
'series': data.get('tvShow'),
|
||||
'season_number': int_or_none(data.get('season')),
|
||||
'episode_number': int_or_none(data.get('episode')),
|
||||
'timestamp': data.get('published'),
|
||||
'thumbnail': data.get('imageUrl'),
|
||||
'subtitles': {'en': [{'url': data['captionUrl']}]} if url_or_none(data.get('captionUrl')) else None,
|
||||
'uploader': 'Channel 10',
|
||||
'uploader_id': '2199827728001',
|
||||
**traverse_obj(data, {
|
||||
'id': ('altId', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'title': ('subtitle', {str}),
|
||||
'alt_title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'age_limit': ('classification', {self._AUS_AGES.get}),
|
||||
'series': ('tvShow', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('published', {int_or_none}),
|
||||
'thumbnail': ('imageUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -542,16 +542,12 @@ class TikTokBaseIE(InfoExtractor):
|
||||
**COMMON_FORMAT_INFO,
|
||||
'format_id': 'download',
|
||||
'url': self._proto_relative_url(download_url),
|
||||
'format_note': 'watermarked',
|
||||
'preference': -2,
|
||||
})
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
|
||||
f.update({
|
||||
'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
|
||||
'preference': f.get('preference') or -2,
|
||||
})
|
||||
|
||||
# Is it a slideshow with only audio for download?
|
||||
if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})):
|
||||
audio_url = aweme_detail['music']['playUrl']
|
||||
@@ -565,7 +561,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
return formats
|
||||
# Filter out broken formats, see https://github.com/yt-dlp/yt-dlp/issues/11034
|
||||
return [f for f in formats if urllib.parse.urlparse(f['url']).hostname != 'www.tiktok.com']
|
||||
|
||||
def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False):
|
||||
author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {
|
||||
|
||||
@@ -236,7 +236,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
|
||||
},
|
||||
},
|
||||
],
|
||||
'min_playlist_count': 4,
|
||||
'playlist_mincount': 4,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -6,6 +6,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
@@ -132,12 +133,12 @@ class TubiTvIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': strip_or_none(title),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'episode': episode_title,
|
||||
'episode': strip_or_none(episode_title),
|
||||
**traverse_obj(video_data, {
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
|
||||
@@ -6,11 +6,12 @@ from ..utils import (
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class TVerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'skip': 'videos are only available for 7 days',
|
||||
'url': 'https://tver.jp/episodes/ep83nf3w4p',
|
||||
@@ -21,80 +22,115 @@ class TVerIE(InfoExtractor):
|
||||
'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
|
||||
'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
|
||||
'channel': 'テレビ朝日',
|
||||
'id': 'ep83nf3w4p',
|
||||
'ext': 'mp4',
|
||||
'onair_label': '5月3日(火)放送分',
|
||||
'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
|
||||
'info_dict': {
|
||||
'id': '6359578055112',
|
||||
'ext': 'mp4',
|
||||
'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
|
||||
'timestamp': 1722279928,
|
||||
'upload_date': '20240729',
|
||||
'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
|
||||
'uploader_id': '4774017240001',
|
||||
'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
|
||||
'duration': 670.571,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://tver.jp/corner/f0103888',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tver.jp/lp/f0033031',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tver.jp/series/srtxft431v',
|
||||
'info_dict': {
|
||||
'id': 'srtxft431v',
|
||||
'title': '名探偵コナン',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '779ffd97493ed59b0a6277ea726b389e',
|
||||
'info_dict': {
|
||||
'id': 'ref:conan-1137-241005',
|
||||
'ext': 'mp4',
|
||||
'title': '名探偵コナン #1137「行列店、味変の秘密」',
|
||||
'uploader_id': '5330942432001',
|
||||
'tags': [],
|
||||
'channel': '読売テレビ',
|
||||
'series': '名探偵コナン',
|
||||
'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
|
||||
'episode': '#1137「行列店、味変の秘密」',
|
||||
'duration': 1469.077,
|
||||
'timestamp': 1728030405,
|
||||
'upload_date': '20241004',
|
||||
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://tver.jp/series/sru35hwdd2',
|
||||
'info_dict': {
|
||||
'id': 'sru35hwdd2',
|
||||
'title': '神回だけ見せます!',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
}, {
|
||||
'url': 'https://tver.jp/series/srkq2shp9d',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
_PLATFORM_UID = None
|
||||
_PLATFORM_TOKEN = None
|
||||
_HEADERS = {'x-tver-platform-type': 'web'}
|
||||
_PLATFORM_QUERY = {}
|
||||
|
||||
def _real_initialize(self):
|
||||
create_response = self._download_json(
|
||||
'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None,
|
||||
note='Creating session', data=b'device_type=pc', headers={
|
||||
'Origin': 'https://s.tver.jp',
|
||||
'Referer': 'https://s.tver.jp/',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
session_info = self._download_json(
|
||||
'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
|
||||
None, 'Creating session', data=b'device_type=pc')
|
||||
self._PLATFORM_QUERY = traverse_obj(session_info, ('result', {
|
||||
'platform_uid': 'platform_uid',
|
||||
'platform_token': 'platform_token',
|
||||
}))
|
||||
|
||||
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
|
||||
return self._download_json(
|
||||
f'https://platform-api.tver.jp/service/api/{path}', video_id, note,
|
||||
fatal=fatal, headers=self._HEADERS, query={
|
||||
**self._PLATFORM_QUERY,
|
||||
**(query or {}),
|
||||
})
|
||||
self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid'))
|
||||
self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token'))
|
||||
|
||||
def _yield_episode_ids_for_series(self, series_id):
|
||||
seasons_info = self._download_json(
|
||||
f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
|
||||
series_id, 'Downloading seasons info', headers=self._HEADERS)
|
||||
for season_id in traverse_obj(
|
||||
seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})):
|
||||
episodes_info = self._call_platform_api(
|
||||
f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info')
|
||||
yield from traverse_obj(episodes_info, (
|
||||
'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
|
||||
if video_type == 'olympic/paris2024/video':
|
||||
# Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
|
||||
# https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
|
||||
return self.url_result(smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
|
||||
{'geo_countries': ['JP']}), 'BrightcoveNew')
|
||||
if video_type == 'series':
|
||||
series_info = self._call_platform_api(
|
||||
f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
|
||||
return self.playlist_from_matches(
|
||||
self._yield_episode_ids_for_series(video_id), video_id,
|
||||
traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
|
||||
ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}')
|
||||
|
||||
elif video_type not in {'series', 'episodes'}:
|
||||
if video_type != 'episodes':
|
||||
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
|
||||
video_id = self._match_id(self._search_regex(
|
||||
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
|
||||
webpage, 'url regex'))
|
||||
|
||||
episode_info = self._download_json(
|
||||
f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
|
||||
video_id, fatal=False,
|
||||
query={
|
||||
'platform_uid': self._PLATFORM_UID,
|
||||
'platform_token': self._PLATFORM_TOKEN,
|
||||
}, headers={
|
||||
'x-tver-platform-type': 'web',
|
||||
episode_info = self._call_platform_api(
|
||||
f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={
|
||||
'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
|
||||
})
|
||||
episode_content = traverse_obj(
|
||||
episode_info, ('result', 'episode', 'content')) or {}
|
||||
|
||||
version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
|
||||
video_info = self._download_json(
|
||||
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
|
||||
query={
|
||||
'v': str_or_none(episode_content.get('version')) or '5',
|
||||
}, headers={
|
||||
'Origin': 'https://tver.jp',
|
||||
'Referer': 'https://tver.jp/',
|
||||
})
|
||||
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
|
||||
query={'v': version}, headers={'Referer': 'https://tver.jp/'})
|
||||
p_id = video_info['video']['accountID']
|
||||
r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False)
|
||||
if not r_id:
|
||||
@@ -110,6 +146,23 @@ class TVerIE(InfoExtractor):
|
||||
provider = str_or_none(episode_content.get('productionProviderName'))
|
||||
onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
|
||||
|
||||
thumbnails = [
|
||||
{
|
||||
'id': quality,
|
||||
'url': update_url_query(
|
||||
f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
|
||||
{'v': version}),
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
for quality, width, height in [
|
||||
('small', 480, 270),
|
||||
('medium', 640, 360),
|
||||
('large', 960, 540),
|
||||
('xlarge', 1280, 720),
|
||||
]
|
||||
]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'title': title,
|
||||
@@ -119,6 +172,7 @@ class TVerIE(InfoExtractor):
|
||||
'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
|
||||
'channel': provider,
|
||||
'description': str_or_none(video_info.get('description')),
|
||||
'thumbnails': thumbnails,
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
class TVN24IE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:(?!eurosport)[^/]+\.)?tvn24(?:bis)?\.pl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
|
||||
'md5': 'fbdec753d7bc29d96036808275f2130c',
|
||||
|
||||
@@ -270,7 +270,7 @@ class TwitCastingLiveIE(InfoExtractor):
|
||||
|
||||
|
||||
class TwitCastingUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
|
||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
|
||||
'info_dict': {
|
||||
|
||||
@@ -150,14 +150,6 @@ class TwitterBaseIE(InfoExtractor):
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||
|
||||
# XXX: Temporary workaround until twitter.com => x.com migration is completed
|
||||
def _real_initialize(self):
|
||||
if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
|
||||
return
|
||||
# User has not yet been migrated to x.com and has passed twitter.com cookies
|
||||
TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
|
||||
TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||
|
||||
@functools.cached_property
|
||||
def _selected_api(self):
|
||||
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
||||
@@ -934,14 +926,13 @@ class TwitterIE(TwitterBaseIE):
|
||||
'uploader_id': 'MoniqueCamarra',
|
||||
'live_status': 'was_live',
|
||||
'release_timestamp': 1658417414,
|
||||
'description': 'md5:acce559345fd49f129c20dbcda3f1201',
|
||||
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
|
||||
'timestamp': 1658407771,
|
||||
'release_date': '20220721',
|
||||
'upload_date': '20220721',
|
||||
},
|
||||
'add_ie': ['TwitterSpaces'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'Requires authentication',
|
||||
}, {
|
||||
# URL specifies video number but --yes-playlist
|
||||
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
|
||||
@@ -1764,7 +1755,7 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'release_timestamp': 1659904215,
|
||||
'release_date': '20220807',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# post_live/TimedOut but downloadable
|
||||
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
|
||||
@@ -1780,6 +1771,8 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'upload_date': '20230413',
|
||||
'release_timestamp': 1681839000,
|
||||
'release_date': '20230418',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
'container': 'm4a_dash', # audio-only format fixup is applied
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -1790,11 +1783,31 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'ext': 'm4a',
|
||||
'title': 'あ',
|
||||
'description': 'Twitter Space participated by nobody yet',
|
||||
'uploader': '息根とめる🔪Twitchで復活',
|
||||
'uploader': '息根とめる',
|
||||
'uploader_id': 'tomeru_ikinone',
|
||||
'live_status': 'was_live',
|
||||
'timestamp': 1685617198,
|
||||
'upload_date': '20230601',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
'container': 'm4a_dash', # audio-only format fixup is applied
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Video Space
|
||||
'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
|
||||
'info_dict': {
|
||||
'id': '1DXGydznBYWKM',
|
||||
'ext': 'mp4',
|
||||
'title': 'America and Israel’s “special relationship”',
|
||||
'description': 'Twitter Space participated by nobody yet',
|
||||
'uploader': 'Candace Owens',
|
||||
'uploader_id': 'RealCandaceO',
|
||||
'live_status': 'was_live',
|
||||
'timestamp': 1723931351,
|
||||
'upload_date': '20240817',
|
||||
'release_timestamp': 1723932000,
|
||||
'release_date': '20240817',
|
||||
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
@@ -1834,8 +1847,6 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
space_id = self._match_id(url)
|
||||
if not self.is_logged_in:
|
||||
self.raise_login_required('Twitter Spaces require authentication')
|
||||
space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
|
||||
if not space_data:
|
||||
raise ExtractorError('Twitter Space not found', expected=True)
|
||||
@@ -1854,13 +1865,17 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
source = traverse_obj(
|
||||
self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
|
||||
('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
|
||||
formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
|
||||
source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
|
||||
headers=headers, fatal=False) if source else []
|
||||
for fmt in formats:
|
||||
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
|
||||
if not is_live:
|
||||
fmt['container'] = 'm4a_dash'
|
||||
is_audio_space = source and 'audio-space' in source
|
||||
formats = self._extract_m3u8_formats(
|
||||
source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
|
||||
# XXX: Some audio-only Spaces need ffmpeg as downloader
|
||||
entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
|
||||
live=is_live, headers=headers, fatal=False) if source else []
|
||||
if is_audio_space:
|
||||
for fmt in formats:
|
||||
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
|
||||
if not is_live:
|
||||
fmt['container'] = 'm4a_dash'
|
||||
|
||||
participants = ', '.join(traverse_obj(
|
||||
space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
|
||||
|
||||
@@ -73,7 +73,7 @@ class UstreamIE(InfoExtractor):
|
||||
def num_to_hex(n):
|
||||
return hex(n)[2:]
|
||||
|
||||
rnd = random.randrange
|
||||
rnd = lambda x: random.randrange(int(x))
|
||||
|
||||
if not extra_note:
|
||||
extra_note = ''
|
||||
|
||||
@@ -8,7 +8,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
qualities,
|
||||
try_get,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -108,7 +109,7 @@ class VeohIE(InfoExtractor):
|
||||
|
||||
categories = metadata.get('categoryPath')
|
||||
if not categories:
|
||||
category = try_get(video, lambda x: x['category'].strip().removeprefix('category_'))
|
||||
category = remove_start(strip_or_none(video.get('category')), 'category_')
|
||||
categories = [category] if category else None
|
||||
tags = video.get('tags')
|
||||
|
||||
|
||||
148
yt_dlp/extractor/vidflex.py
Normal file
148
yt_dlp/extractor/vidflex.py
Normal file
@@ -0,0 +1,148 @@
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class VidflexIE(InfoExtractor):
|
||||
_DOMAINS_RE = [
|
||||
r'[^.]+\.vidflex\.tv',
|
||||
r'(?:www\.)?acactv\.ca',
|
||||
r'(?:www\.)?albertalacrossetv\.com',
|
||||
r'(?:www\.)?cjfltv\.com',
|
||||
r'(?:www\.)?figureitoutbaseball\.com',
|
||||
r'(?:www\.)?ocaalive\.com',
|
||||
r'(?:www\.)?pegasussports\.tv',
|
||||
r'(?:www\.)?praxisseries\.ca',
|
||||
r'(?:www\.)?silenticetv\.com',
|
||||
r'(?:www\.)?tuffhedemantv\.com',
|
||||
r'(?:www\.)?watchfuntv\.com',
|
||||
r'live\.ofsaa\.on\.ca',
|
||||
r'tv\.procoro\.ca',
|
||||
r'tv\.realcastmedia\.net',
|
||||
r'tv\.fringetheatre\.ca',
|
||||
r'video\.haisla\.ca',
|
||||
r'video\.hockeycanada\.ca',
|
||||
r'video\.huuayaht\.org',
|
||||
r'video\.turningpointensemble\.ca',
|
||||
r'videos\.livingworks\.net',
|
||||
r'videos\.telusworldofscienceedmonton\.ca',
|
||||
r'watch\.binghamtonbulldogs\.com',
|
||||
r'watch\.rekindle\.tv',
|
||||
r'watch\.wpca\.com',
|
||||
]
|
||||
_VALID_URL = rf'https?://(?:{"|".join(_DOMAINS_RE)})/[a-z]{{2}}(?:-[a-z]{{2}})?/c/[\w-]+\.(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.hockeycanada.ca/en/c/nwt-micd-up-with-jamie-lee-rattray.107486',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# m3u8 + https
|
||||
'url': 'https://video.hockeycanada.ca/en-us/c/nwt-micd-up-with-jamie-lee-rattray.107486',
|
||||
'info_dict': {
|
||||
'id': '107486',
|
||||
'title': 'NWT: Mic’d up with Jamie Lee Rattray',
|
||||
'ext': 'mp4',
|
||||
'duration': 115,
|
||||
'timestamp': 1634310409,
|
||||
'upload_date': '20211015',
|
||||
'tags': ['English', '2021', "National Women's Team"],
|
||||
'description': 'md5:efb1cf6165b48cc3f5555c4262dd5b23',
|
||||
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://video.hockeycanada.ca/en/c/mwc-remembering-the-wild-ride-in-riga.112307',
|
||||
'info_dict': {
|
||||
'id': '112307',
|
||||
'title': 'MWC: Remembering the wild ride in Riga',
|
||||
'ext': 'mp4',
|
||||
'duration': 322,
|
||||
'timestamp': 1716235607,
|
||||
'upload_date': '20240520',
|
||||
'tags': ['English', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
|
||||
'description': r're:.+Canada’s National Men’s Team.+',
|
||||
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# the same video in French
|
||||
'url': 'https://video.hockeycanada.ca/fr/c/cmm-retour-sur-un-parcours-endiable-a-riga.112304',
|
||||
'info_dict': {
|
||||
'id': '112304',
|
||||
'title': 'CMM : Retour sur un parcours endiablé à Riga',
|
||||
'ext': 'mp4',
|
||||
'duration': 322,
|
||||
'timestamp': 1716235545,
|
||||
'upload_date': '20240520',
|
||||
'tags': ['French', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
|
||||
'description': 'md5:cf825222882a3dab1cd62cffcf3b4d1f',
|
||||
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://myfbcgreenville.vidflex.tv/en/c/may-12th-2024.658',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.figureitoutbaseball.com/en/c/fiob-podcast-14-dan-bertolini-ncaa-d1-head-coach-recorded-11-29-2018.1367',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://videos.telusworldofscienceedmonton.ca/en/c/the-aurora-project-timelapse-4.577',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tuffhedemantv.com/en/c/2022-tuff-hedeman-tour-hobbs-nm-january-22.227',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.albertalacrossetv.com/en/c/up-floor-ground-balls-one-more.3449',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.silenticetv.com/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://jphl.vidflex.tv/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = self._html_search_regex(
|
||||
r'content_api:\s*(["\'])(?P<url>https?://(?:(?!\1).)+)\1', webpage, 'content api url', group='url')
|
||||
media_config = traverse_obj(
|
||||
self._download_json(data_url, video_id),
|
||||
('config', {base64.b64decode}, {bytes.decode}, {json.loads}, {dict}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': list(self._yield_formats(media_config, video_id)),
|
||||
**self._search_json_ld(
|
||||
webpage.replace('/*<![CDATA[*/', '').replace('/*]]>*/', ''), video_id),
|
||||
}
|
||||
|
||||
def _yield_formats(self, media_config, video_id):
|
||||
for media_source in traverse_obj(media_config, ('media', 'source', lambda _, v: url_or_none(v['src']))):
|
||||
media_url = media_source['src']
|
||||
media_type = mimetype2ext(media_source.get('type'))
|
||||
|
||||
if media_type == 'm3u8':
|
||||
yield from self._extract_m3u8_formats(media_url, video_id, fatal=False, m3u8_id='hls')
|
||||
elif media_type == 'mp4':
|
||||
bitrate = self._search_regex(r'_(\d+)k\.mp4', media_url, 'bitrate', default=None)
|
||||
yield {
|
||||
'format_id': join_nonempty('http', bitrate),
|
||||
'url': media_url,
|
||||
'ext': 'mp4',
|
||||
'tbr': int_or_none(bitrate),
|
||||
}
|
||||
else:
|
||||
yield {
|
||||
'url': media_url,
|
||||
'ext': media_type,
|
||||
}
|
||||
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
@@ -146,6 +147,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
})
|
||||
|
||||
# TODO: fix handling of 308 status code returned for live archive manifest requests
|
||||
QUALITIES = ('low', 'medium', 'high')
|
||||
quality = qualities(QUALITIES)
|
||||
sep_pattern = r'/sep/video/'
|
||||
for files_type in ('hls', 'dash'):
|
||||
for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
|
||||
@@ -166,6 +169,11 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
|
||||
note=f'Downloading {cdn_name} m3u8 information',
|
||||
fatal=False)
|
||||
# m3u8 doesn't give audio bitrates; need to prioritize based on GROUP-ID
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10854
|
||||
for f in fmts:
|
||||
if mobj := re.search(rf'audio-({"|".join(QUALITIES)})', f['format_id']):
|
||||
f['quality'] = quality(mobj.group(1))
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif files_type == 'dash':
|
||||
@@ -234,13 +242,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
|
||||
}
|
||||
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None):
|
||||
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
|
||||
video_id, 'Downloading API JSON', headers={
|
||||
'Authorization': f'jwt {jwt_token}',
|
||||
'Accept': 'application/json',
|
||||
}, query={
|
||||
'fields': ','.join((
|
||||
'config_url', 'created_time', 'description', 'download', 'license',
|
||||
'metadata.connections.comments.total', 'metadata.connections.likes.total',
|
||||
'release_time', 'stats.plays')),
|
||||
}, **kwargs)
|
||||
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
|
||||
# Original/source formats are only available when logged in
|
||||
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
|
||||
return
|
||||
|
||||
query = {'action': 'load_download_config'}
|
||||
if unlisted_hash:
|
||||
query['unlisted_hash'] = unlisted_hash
|
||||
download_data = self._download_json(
|
||||
url, video_id, fatal=False, query=query,
|
||||
headers={'X-Requested-With': 'XMLHttpRequest'},
|
||||
url, video_id, 'Loading download config JSON', fatal=False,
|
||||
query=query, headers={'X-Requested-With': 'XMLHttpRequest'},
|
||||
expected_status=(403, 404)) or {}
|
||||
source_file = download_data.get('source_file')
|
||||
download_url = try_get(source_file, lambda x: x['download_url'])
|
||||
@@ -261,15 +286,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'quality': 1,
|
||||
}
|
||||
|
||||
jwt_response = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
|
||||
if not jwt_response.get('jwt'):
|
||||
jwt = jwt or traverse_obj(self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
|
||||
if not jwt:
|
||||
return
|
||||
headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'}
|
||||
original_response = self._download_json(
|
||||
f'https://api.vimeo.com/videos/{video_id}', video_id,
|
||||
headers=headers, fatal=False, expected_status=(403, 404)) or {}
|
||||
for download_data in original_response.get('download') or []:
|
||||
original_response = api_data or self._call_videos_api(
|
||||
video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
|
||||
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
|
||||
download_url = download_data.get('link')
|
||||
if not download_url or download_data.get('quality') != 'source':
|
||||
continue
|
||||
@@ -354,7 +377,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'skip': 'No longer available',
|
||||
},
|
||||
{
|
||||
'url': 'http://player.vimeo.com/video/54469442',
|
||||
'url': 'https://player.vimeo.com/video/54469442',
|
||||
'md5': '619b811a4417aa4abe78dc653becf511',
|
||||
'note': 'Videos that embed the url in the player page',
|
||||
'info_dict': {
|
||||
@@ -370,6 +393,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/68375962',
|
||||
@@ -379,22 +403,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '68375962',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl password protected test video',
|
||||
'timestamp': 1371200155,
|
||||
'timestamp': 1371214555,
|
||||
'upload_date': '20130614',
|
||||
'release_timestamp': 1371214555,
|
||||
'release_date': '20130614',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
|
||||
'uploader_id': 'user18948128',
|
||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||
'duration': 10,
|
||||
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/channels/keypeele/75629013',
|
||||
@@ -418,29 +443,38 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {'format': 'http-1080p'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/76979871',
|
||||
'note': 'Video with subtitles',
|
||||
'info_dict': {
|
||||
'id': '76979871',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'The New Vimeo Player (You Know, For Videos)',
|
||||
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
|
||||
'timestamp': 1381846109,
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'timestamp': 1381860509,
|
||||
'upload_date': '20131015',
|
||||
'release_timestamp': 1381860509,
|
||||
'release_date': '20131015',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
|
||||
'uploader_id': 'staff',
|
||||
'uploader': 'Vimeo Staff',
|
||||
'uploader': 'Vimeo',
|
||||
'duration': 62,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/452001751-8216e0571c251a09d7a8387550942d89f7f86f6398f8ed886e639b0dd50d3c90-d_1280',
|
||||
'subtitles': {
|
||||
'de': [{'ext': 'vtt'}],
|
||||
'en': [{'ext': 'vtt'}],
|
||||
'es': [{'ext': 'vtt'}],
|
||||
'fr': [{'ext': 'vtt'}],
|
||||
'de': 'count:3',
|
||||
'en': 'count:3',
|
||||
'es': 'count:3',
|
||||
'fr': 'count:3',
|
||||
},
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'expected_warnings': [
|
||||
'Ignoring subtitle tracks found in the HLS manifest',
|
||||
'Failed to parse XML: not well-formed',
|
||||
],
|
||||
},
|
||||
{
|
||||
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
|
||||
@@ -456,11 +490,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 118,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# contains original format
|
||||
# contains Original format
|
||||
'url': 'https://vimeo.com/33951933',
|
||||
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
|
||||
# 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
|
||||
'info_dict': {
|
||||
'id': '33951933',
|
||||
'ext': 'mp4',
|
||||
@@ -476,15 +511,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
|
||||
'like_count': int,
|
||||
'tags': 'count:11',
|
||||
},
|
||||
# 'params': {'format': 'Original'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'note': 'Contains original format not accessible in webpage',
|
||||
'note': 'Contains source format not accessible in webpage',
|
||||
'url': 'https://vimeo.com/393756517',
|
||||
'md5': 'c464af248b592190a5ffbb5d33f382b0',
|
||||
# 'md5': 'c464af248b592190a5ffbb5d33f382b0',
|
||||
'info_dict': {
|
||||
'id': '393756517',
|
||||
'ext': 'mov',
|
||||
# 'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1582642091,
|
||||
'uploader_id': 'frameworkla',
|
||||
'title': 'Straight To Hell - Sabrina: Netflix',
|
||||
@@ -495,6 +534,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/frameworkla',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# only available via https://vimeo.com/channels/tributes/6213729 and
|
||||
@@ -511,16 +552,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'channel_id': 'tributes',
|
||||
'timestamp': 1250886430,
|
||||
'upload_date': '20090821',
|
||||
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'duration': 321,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
|
||||
'like_count': int,
|
||||
'tags': ['[the shining', 'vimeohq', 'cv', 'vimeo tribute]'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# redirects to ondemand extractor and should be passed through it
|
||||
@@ -543,28 +586,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'skip': 'this page is no longer available.',
|
||||
},
|
||||
{
|
||||
'url': 'http://player.vimeo.com/video/68375962',
|
||||
'url': 'https://player.vimeo.com/video/68375962',
|
||||
'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
|
||||
'info_dict': {
|
||||
'id': '68375962',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl password protected test video',
|
||||
'timestamp': 1371200155,
|
||||
'upload_date': '20130614',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
|
||||
'uploader_id': 'user18948128',
|
||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||
'duration': 10,
|
||||
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
|
||||
@@ -592,7 +630,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'user20132939',
|
||||
'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'upload_date': '20150209',
|
||||
'timestamp': 1423518307,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
|
||||
@@ -606,6 +644,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'format': 'best[protocol=https]',
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# source file returns 403: Forbidden
|
||||
@@ -633,11 +672,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'release_date': '20160329',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/138909882',
|
||||
'info_dict': {
|
||||
'id': '138909882',
|
||||
# 'ext': 'm4v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
|
||||
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
|
||||
@@ -645,11 +686,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Firework Champions',
|
||||
'upload_date': '20150910',
|
||||
'timestamp': 1441901895,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/fireworkchampions',
|
||||
'tags': 'count:6',
|
||||
'duration': 229,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'Original',
|
||||
# 'format': 'source',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/channels/staffpicks/143603739',
|
||||
@@ -670,8 +719,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/karimhd',
|
||||
'channel_url': 'https://vimeo.com/channels/staffpicks',
|
||||
'tags': 'count:6',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
||||
@@ -701,6 +752,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# chapters must be sorted, see: https://github.com/yt-dlp/yt-dlp/issues/5308
|
||||
@@ -735,6 +787,48 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# vimeo.com URL with unlisted hash and Original format
|
||||
'url': 'https://vimeo.com/144579403/ec02229140',
|
||||
# 'md5': '6b662c2884e0373183fbde2a0d15cb78',
|
||||
'info_dict': {
|
||||
'id': '144579403',
|
||||
'ext': 'mp4',
|
||||
'title': 'SALESMANSHIP',
|
||||
'description': 'md5:4338302f347a1ff8841b4a3aecaa09f0',
|
||||
'uploader': 'Off the Picture Pictures',
|
||||
'uploader_id': 'offthepicturepictures',
|
||||
'uploader_url': 'https://vimeo.com/offthepicturepictures',
|
||||
'duration': 669,
|
||||
'upload_date': '20151104',
|
||||
'timestamp': 1446607180,
|
||||
'release_date': '20151104',
|
||||
'release_timestamp': 1446607180,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1018638656-[\da-f]+-d_1280',
|
||||
},
|
||||
# 'params': {'format': 'Original'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# player.vimeo.com URL with source format
|
||||
'url': 'https://player.vimeo.com/video/859028877',
|
||||
# 'md5': '19ca3d2463441dee2d2f0671ac2916a2',
|
||||
'info_dict': {
|
||||
'id': '859028877',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ariana Grande - Honeymoon Avenue (Live from London)',
|
||||
'uploader': 'Raja Virdi',
|
||||
'uploader_id': 'rajavirdi',
|
||||
'uploader_url': 'https://vimeo.com/rajavirdi',
|
||||
'duration': 309,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d_1280',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
{
|
||||
# user playlist alias -> https://vimeo.com/258705797
|
||||
'url': 'https://vimeo.com/user26785108/newspiritualguide',
|
||||
@@ -768,16 +862,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
raise ExtractorError('Wrong video password', expected=True)
|
||||
return checked
|
||||
|
||||
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
|
||||
video_id, 'Downloading API JSON', headers={
|
||||
'Authorization': f'jwt {jwt_token}',
|
||||
'Accept': 'application/json',
|
||||
}, query={
|
||||
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
|
||||
})
|
||||
|
||||
def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
|
||||
@@ -785,11 +869,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
for retry in (False, True):
|
||||
try:
|
||||
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
|
||||
and 'password' in traverse_obj(
|
||||
e.cause.response.read(),
|
||||
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
|
||||
({json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
)):
|
||||
self._verify_video_password(
|
||||
video_id, self._get_video_password(), viewer['xsrft'])
|
||||
@@ -798,6 +883,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
|
||||
info = self._parse_config(self._download_json(
|
||||
video['config_url'], video_id), video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
|
||||
if source_format:
|
||||
info['formats'].append(source_format)
|
||||
|
||||
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
|
||||
info.update({
|
||||
'description': video.get('description'),
|
||||
@@ -899,7 +989,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(
|
||||
redirect_url, video_id, headers)
|
||||
return self._parse_config(config, video_id)
|
||||
info = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash)
|
||||
if source_format:
|
||||
info['formats'].append(source_format)
|
||||
return info
|
||||
|
||||
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
|
||||
if vimeo_config:
|
||||
@@ -1269,6 +1364,20 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
IE_DESC = 'Review pages on vimeo'
|
||||
_VALID_URL = r'https?://vimeo\.com/(?P<user>[^/?#]+)/review/(?P<id>\d+)/(?P<hash>[\da-f]{10})'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/user170863801/review/996447483/a316d6ed8d',
|
||||
'info_dict': {
|
||||
'id': '996447483',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rodeo day 1-_2',
|
||||
'uploader': 'BROADKAST',
|
||||
'uploader_id': 'user170863801',
|
||||
'uploader_url': 'https://vimeo.com/user170863801',
|
||||
'duration': 30,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML'],
|
||||
}, {
|
||||
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
||||
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
||||
'info_dict': {
|
||||
@@ -1282,6 +1391,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280',
|
||||
'uploader_url': 'https://vimeo.com/user21297594',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'note': 'video player needs Referer',
|
||||
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
|
||||
@@ -1316,6 +1426,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
|
||||
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
|
||||
data = self._download_json(data_url, video_id)
|
||||
viewer = {}
|
||||
if data.get('isLocked') is True:
|
||||
video_password = self._get_video_password()
|
||||
viewer = self._download_json(
|
||||
@@ -1327,8 +1438,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
config = self._download_json(config_url, video_id)
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action', video_id,
|
||||
unlisted_hash=traverse_obj(config_url, ({parse_qs}, 'h', -1)))
|
||||
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
|
||||
video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
|
||||
if source_format:
|
||||
info_dict['formats'].append(source_format)
|
||||
info_dict['description'] = clean_html(clip_data.get('description'))
|
||||
|
||||
@@ -90,7 +90,7 @@ class ViuIE(ViuBaseIE):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||
|
||||
for key, value in video_data.items():
|
||||
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||
mobj = re.match(r'subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||
if not mobj:
|
||||
continue
|
||||
subtitles.setdefault(mobj.group('lang'), []).append({
|
||||
|
||||
@@ -27,8 +27,9 @@ from ..utils import (
|
||||
|
||||
class WeverseBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'weverse'
|
||||
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api/v2'
|
||||
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api'
|
||||
_API_HEADERS = {
|
||||
'Accept': 'application/json',
|
||||
'Referer': 'https://weverse.io/',
|
||||
'WEV-device-Id': str(uuid.uuid4()),
|
||||
}
|
||||
@@ -39,14 +40,14 @@ class WeverseBaseIE(InfoExtractor):
|
||||
|
||||
headers = {
|
||||
'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a',
|
||||
'x-acc-app-version': '2.2.6',
|
||||
'x-acc-app-version': '3.3.6',
|
||||
'x-acc-language': 'en',
|
||||
'x-acc-service-id': 'weverse',
|
||||
'x-acc-trace-id': str(uuid.uuid4()),
|
||||
'x-clog-user-device-id': str(uuid.uuid4()),
|
||||
}
|
||||
valid_username = traverse_obj(self._download_json(
|
||||
f'{self._ACCOUNT_API_BASE}/signup/email/status', None, note='Checking username',
|
||||
f'{self._ACCOUNT_API_BASE}/v2/signup/email/status', None, note='Checking username',
|
||||
query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
|
||||
if not valid_username:
|
||||
raise ExtractorError('Invalid username provided', expected=True)
|
||||
@@ -54,8 +55,9 @@ class WeverseBaseIE(InfoExtractor):
|
||||
headers['content-type'] = 'application/json'
|
||||
try:
|
||||
auth = self._download_json(
|
||||
f'{self._ACCOUNT_API_BASE}/auth/token/by-credentials', None, data=json.dumps({
|
||||
f'{self._ACCOUNT_API_BASE}/v3/auth/token/by-credentials', None, data=json.dumps({
|
||||
'email': username,
|
||||
'otpSessionId': 'BY_PASS',
|
||||
'password': password,
|
||||
}, separators=(',', ':')).encode(), headers=headers, note='Logging in')
|
||||
except ExtractorError as e:
|
||||
@@ -78,8 +80,10 @@ class WeverseBaseIE(InfoExtractor):
|
||||
# From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js:
|
||||
key = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
|
||||
api_path = update_url_query(ep, {
|
||||
# 'gcc': 'US',
|
||||
'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4',
|
||||
'language': 'en',
|
||||
'os': 'WEB',
|
||||
'platform': 'WEB',
|
||||
'wpf': 'pc',
|
||||
})
|
||||
@@ -152,7 +156,7 @@ class WeverseBaseIE(InfoExtractor):
|
||||
'description': ((('extension', 'mediaInfo', 'body'), 'body'), {str}),
|
||||
'uploader': ('author', 'profileName', {str}),
|
||||
'uploader_id': ('author', 'memberId', {str}),
|
||||
'creator': ('community', 'communityName', {str}),
|
||||
'creators': ('community', 'communityName', {str}, all),
|
||||
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
|
||||
'duration': ('extension', 'video', 'playTime', {float_or_none}),
|
||||
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
|
||||
@@ -196,7 +200,7 @@ class WeverseIE(WeverseBaseIE):
|
||||
'channel': 'billlie',
|
||||
'channel_id': '72',
|
||||
'channel_url': 'https://weverse.io/billlie',
|
||||
'creator': 'Billlie',
|
||||
'creators': ['Billlie'],
|
||||
'timestamp': 1666262062,
|
||||
'upload_date': '20221020',
|
||||
'release_timestamp': 1666262058,
|
||||
@@ -222,7 +226,7 @@ class WeverseIE(WeverseBaseIE):
|
||||
'channel': 'lesserafim',
|
||||
'channel_id': '47',
|
||||
'channel_url': 'https://weverse.io/lesserafim',
|
||||
'creator': 'LE SSERAFIM',
|
||||
'creators': ['LE SSERAFIM'],
|
||||
'timestamp': 1659353400,
|
||||
'upload_date': '20220801',
|
||||
'release_timestamp': 1659353400,
|
||||
@@ -286,7 +290,7 @@ class WeverseIE(WeverseBaseIE):
|
||||
|
||||
elif live_status == 'is_live':
|
||||
video_info = self._call_api(
|
||||
f'/video/v1.0/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
video_id, note='Downloading live JSON')
|
||||
playback = self._parse_json(video_info['lipPlayback'], video_id)
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
@@ -302,7 +306,7 @@ class WeverseIE(WeverseBaseIE):
|
||||
else:
|
||||
infra_video_id = post['extension']['video']['infraVideoId']
|
||||
in_key = self._call_api(
|
||||
f'/video/v1.0/vod/{api_video_id}/inKey?preview=false', video_id,
|
||||
f'/video/v1.1/vod/{api_video_id}/inKey?preview=false', video_id,
|
||||
data=b'{}', note='Downloading VOD API key')['inKey']
|
||||
|
||||
video_info = self._download_json(
|
||||
@@ -347,7 +351,6 @@ class WeverseMediaIE(WeverseBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<artist>[^/?#]+)/media/(?P<id>[\d-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weverse.io/billlie/media/4-116372884',
|
||||
'md5': '8efc9cfd61b2f25209eb1a5326314d28',
|
||||
'info_dict': {
|
||||
'id': 'e-C9wLSQs6o',
|
||||
'ext': 'mp4',
|
||||
@@ -358,8 +361,9 @@ class WeverseMediaIE(WeverseBaseIE):
|
||||
'channel_url': 'https://www.youtube.com/channel/UCyc9sUCxELTDK9vELO5Fzeg',
|
||||
'uploader': 'Billlie',
|
||||
'uploader_id': '@Billlie',
|
||||
'uploader_url': 'http://www.youtube.com/@Billlie',
|
||||
'uploader_url': 'https://www.youtube.com/@Billlie',
|
||||
'upload_date': '20230403',
|
||||
'timestamp': 1680533992,
|
||||
'duration': 211,
|
||||
'age_limit': 0,
|
||||
'playable_in_embed': True,
|
||||
@@ -372,6 +376,8 @@ class WeverseMediaIE(WeverseBaseIE):
|
||||
'thumbnail': 'https://i.ytimg.com/vi/e-C9wLSQs6o/maxresdefault.jpg',
|
||||
'categories': ['Entertainment'],
|
||||
'tags': 'count:7',
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://weverse.io/billlie/media/3-102914520',
|
||||
@@ -386,7 +392,7 @@ class WeverseMediaIE(WeverseBaseIE):
|
||||
'channel': 'billlie',
|
||||
'channel_id': '72',
|
||||
'channel_url': 'https://weverse.io/billlie',
|
||||
'creator': 'Billlie',
|
||||
'creators': ['Billlie'],
|
||||
'timestamp': 1662174000,
|
||||
'upload_date': '20220903',
|
||||
'release_timestamp': 1662174000,
|
||||
@@ -432,7 +438,7 @@ class WeverseMomentIE(WeverseBaseIE):
|
||||
'uploader_id': '66a07e164b56a696ee71c99315ffe27b',
|
||||
'channel': 'secretnumber',
|
||||
'channel_id': '56',
|
||||
'creator': 'SECRET NUMBER',
|
||||
'creators': ['SECRET NUMBER'],
|
||||
'duration': 10,
|
||||
'upload_date': '20230405',
|
||||
'timestamp': 1680653968,
|
||||
@@ -441,7 +447,6 @@ class WeverseMomentIE(WeverseBaseIE):
|
||||
'comment_count': int,
|
||||
'availability': 'needs_auth',
|
||||
},
|
||||
'skip': 'Moment has expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -571,7 +576,7 @@ class WeverseLiveIE(WeverseBaseIE):
|
||||
'channel': 'purplekiss',
|
||||
'channel_id': '35',
|
||||
'channel_url': 'https://weverse.io/purplekiss',
|
||||
'creator': 'PURPLE KISS',
|
||||
'creators': ['PURPLE KISS'],
|
||||
'timestamp': 1680780892,
|
||||
'upload_date': '20230406',
|
||||
'release_timestamp': 1680780883,
|
||||
@@ -584,6 +589,31 @@ class WeverseLiveIE(WeverseBaseIE):
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://weverse.io/lesserafim',
|
||||
'info_dict': {
|
||||
'id': '4-181521628',
|
||||
'ext': 'mp4',
|
||||
'title': r're:심심해서요',
|
||||
'description': '',
|
||||
'uploader': '채채🤎',
|
||||
'uploader_id': 'd49b8b06f3cc1d92d655b25ab27ac2e7',
|
||||
'channel': 'lesserafim',
|
||||
'channel_id': '47',
|
||||
'creators': ['LE SSERAFIM'],
|
||||
'channel_url': 'https://weverse.io/lesserafim',
|
||||
'timestamp': 1728570273,
|
||||
'upload_date': '20241010',
|
||||
'release_timestamp': 1728570264,
|
||||
'release_date': '20241010',
|
||||
'thumbnail': r're:https://phinf\.wevpstatic\.net/.+\.png',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'availability': 'needs_auth',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Livestream has ended',
|
||||
}, {
|
||||
'url': 'https://weverse.io/billlie/',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
@@ -25,16 +26,25 @@ class WistiaBaseIE(InfoExtractor):
|
||||
|
||||
def _download_embed_config(self, config_type, config_id, referer):
|
||||
base_url = self._EMBED_BASE_URL + f'{config_type}/{config_id}'
|
||||
video_password = self.get_param('videopassword')
|
||||
embed_config = self._download_json(
|
||||
base_url + '.json', config_id, headers={
|
||||
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
|
||||
})
|
||||
}, query=filter_dict({'password': video_password}))
|
||||
|
||||
error = traverse_obj(embed_config, 'error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
f'Error while getting the playlist: {error}', expected=True)
|
||||
|
||||
if traverse_obj(embed_config, (
|
||||
'media', ('embed_options', 'embedOptions'), 'plugin',
|
||||
'passwordProtectedVideo', 'on', any)) == 'true':
|
||||
if video_password:
|
||||
raise ExtractorError('Invalid video password', expected=True)
|
||||
raise ExtractorError(
|
||||
'This content is password-protected. Use the --video-password option', expected=True)
|
||||
|
||||
return embed_config
|
||||
|
||||
def _get_real_ext(self, url):
|
||||
|
||||
@@ -1,7 +1,17 @@
|
||||
import base64
|
||||
import math
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import InAdvancePagedList, str_or_none, traverse_obj, try_call
|
||||
from .videa import VideaIE
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class XimalayaBaseIE(InfoExtractor):
|
||||
@@ -11,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor):
|
||||
class XimalayaIE(XimalayaBaseIE):
|
||||
IE_NAME = 'ximalaya'
|
||||
IE_DESC = '喜马拉雅FM'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ximalaya.com/sound/47740352/',
|
||||
@@ -71,23 +81,92 @@ class XimalayaIE(XimalayaBaseIE):
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# VIP-restricted audio
|
||||
'url': 'https://www.ximalaya.com/sound/562111701',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_filename(file_id, seed):
|
||||
cgstr = ''
|
||||
key = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890'
|
||||
for _ in key:
|
||||
seed = float(int(211 * seed + 30031) % 65536)
|
||||
r = int(seed / 65536 * len(key))
|
||||
cgstr += key[r]
|
||||
key = key.replace(key[r], '')
|
||||
parts = file_id.split('*')
|
||||
filename = ''.join(cgstr[int(part)] for part in parts if part.isdecimal())
|
||||
if not filename.startswith('/'):
|
||||
filename = '/' + filename
|
||||
return filename
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_url_params(encrypted_params):
|
||||
params = VideaIE.rc4(
|
||||
base64.b64decode(encrypted_params), 'xkt3a41psizxrh9l').split('-')
|
||||
# sign, token, timestamp
|
||||
return params[1], params[2], params[3]
|
||||
|
||||
def _real_extract(self, url):
|
||||
scheme = 'https' if url.startswith('https') else 'http'
|
||||
|
||||
audio_id = self._match_id(url)
|
||||
audio_info_file = f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json'
|
||||
audio_info = self._download_json(
|
||||
audio_info_file, audio_id,
|
||||
f'Downloading info json {audio_info_file}', 'Unable to download info file')
|
||||
f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json', audio_id,
|
||||
'Downloading info json', 'Unable to download info file')
|
||||
|
||||
formats = [{
|
||||
formats = []
|
||||
# NOTE: VIP-restricted audio
|
||||
if audio_info.get('is_paid'):
|
||||
ts = int(time.time())
|
||||
vip_info = self._download_json(
|
||||
f'{scheme}://mpay.ximalaya.com/mobile/track/pay/{audio_id}/{ts}',
|
||||
audio_id, 'Downloading VIP info json', 'Unable to download VIP info file',
|
||||
query={'device': 'pc', 'isBackend': 'true', '_': ts})
|
||||
filename = self._decrypt_filename(vip_info['fileId'], vip_info['seed'])
|
||||
sign, token, timestamp = self._decrypt_url_params(vip_info['ep'])
|
||||
vip_url = update_url_query(
|
||||
f'{vip_info["domain"]}/download/{vip_info["apiVersion"]}{filename}', {
|
||||
'sign': sign,
|
||||
'token': token,
|
||||
'timestamp': timestamp,
|
||||
'buy_key': vip_info['buyKey'],
|
||||
'duration': vip_info['duration'],
|
||||
})
|
||||
fmt = {
|
||||
'format_id': 'vip',
|
||||
'url': vip_url,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if '_preview_' in vip_url:
|
||||
self.report_warning(
|
||||
f'This tracks requires a VIP account. Using a sample instead. {self._login_hint()}')
|
||||
fmt.update({
|
||||
'format_note': 'Sample',
|
||||
'preference': -10,
|
||||
**traverse_obj(vip_info, {
|
||||
'filesize': ('sampleLength', {int_or_none}),
|
||||
'duration': ('sampleDuration', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
else:
|
||||
fmt.update(traverse_obj(vip_info, {
|
||||
'filesize': ('totalLength', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}))
|
||||
|
||||
fmt['abr'] = try_call(lambda: fmt['filesize'] * 8 / fmt['duration'] / 1024)
|
||||
formats.append(fmt)
|
||||
|
||||
formats.extend([{
|
||||
'format_id': f'{bps}k',
|
||||
'url': audio_info[k],
|
||||
'abr': bps,
|
||||
'vcodec': 'none',
|
||||
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)]
|
||||
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)])
|
||||
|
||||
thumbnails = []
|
||||
for k in audio_info:
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user