1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-24 00:49:06 +00:00

Merge branch 'master' into jsi

This commit is contained in:
c-basalt
2024-12-26 19:59:46 -05:00
160 changed files with 5370 additions and 2004 deletions

View File

@@ -217,6 +217,7 @@ from .bbc import (
BBCCoUkIPlayerGroupIE,
BBCCoUkPlaylistIE,
)
from .beacon import BeaconTvIE
from .beatbump import (
BeatBumpPlaylistIE,
BeatBumpVideoIE,
@@ -277,6 +278,7 @@ from .bleacherreport import (
from .blerp import BlerpIE
from .blogger import BloggerIE
from .bloomberg import BloombergIE
from .bluesky import BlueskyIE
from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE
from .boosty import BoostyIE
@@ -362,7 +364,10 @@ from .ccc import (
)
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .cda import (
CDAIE,
CDAFolderIE,
)
from .cellebrite import CellebriteIE
from .ceskatelevize import CeskaTelevizeIE
from .cgtn import CGTNIE
@@ -397,8 +402,6 @@ from .cmt import CMTIE
from .cnbc import CNBCVideoIE
from .cnn import (
CNNIE,
CNNArticleIE,
CNNBlogsIE,
CNNIndonesiaIE,
)
from .comedycentral import (
@@ -729,6 +732,7 @@ from .genius import (
GeniusIE,
GeniusLyricsIE,
)
from .germanupa import GermanupaIE
from .getcourseru import (
GetCourseRuIE,
GetCourseRuPlayerIE,
@@ -822,7 +826,10 @@ from .hungama import (
HungamaIE,
HungamaSongIE,
)
from .huya import HuyaLiveIE
from .huya import (
HuyaLiveIE,
HuyaVideoIE,
)
from .hypem import HypemIE
from .hypergryph import MonsterSirenHypergryphMusicIE
from .hytale import HytaleIE
@@ -945,6 +952,7 @@ from .kick import (
)
from .kicker import KickerIE
from .kickstarter import KickStarterIE
from .kika import KikaIE
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE
@@ -1036,10 +1044,7 @@ from .livestream import (
LivestreamShortenerIE,
)
from .livestreamfails import LivestreamfailsIE
from .lnkgo import (
LnkGoIE,
LnkIE,
)
from .lnk import LnkIE
from .loom import (
LoomFolderIE,
LoomIE,
@@ -1164,6 +1169,7 @@ from .mlb import (
)
from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE
from .mojevideo import MojevideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .motherless import (
@@ -1810,6 +1816,7 @@ from .screen9 import Screen9IE
from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE
from .screencastomatic import ScreencastOMaticIE
from .screenrec import ScreenRecIE
from .scrippsnetworks import (
ScrippsNetworksIE,
ScrippsNetworksWatchIE,
@@ -1820,6 +1827,7 @@ from .scte import (
SCTECourseIE,
)
from .sejmpl import SejmIE
from .sen import SenIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import (
SenateGovIE,
@@ -1875,6 +1883,7 @@ from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
from .smotrim import SmotrimIE
from .snapchat import SnapchatSpotlightIE
from .snotr import SnotrIE
from .sohu import (
SohuIE,
@@ -2311,6 +2320,7 @@ from .videomore import (
VideomoreVideoIE,
)
from .videopress import VideoPressIE
from .vidflex import VidflexIE
from .vidio import (
VidioIE,
VidioLiveIE,

View File

@@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
},
'playlist_count': 15,
'skip': 'This program is not currently available in ABC iview',
}, {
'url': 'https://iview.abc.net.au/show/inbestigators',
'info_dict': {
'id': '175343-1',
'title': 'Series 1',
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
'series': 'The Inbestigators',
'season': 'Series 1',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
},
'playlist_count': 17,
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
webpage_data = self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
webpage, 'initial state')
video_data = self._parse_json(
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
video_data = video_data['route']['pageData']['_embedded']
video_data = self._search_json(
r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
transform_source=lambda x: x.encode().decode('unicode_escape'),
end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):

View File

@@ -4,7 +4,7 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
_TEST = {
'url': 'http://academicearth.org/playlists/laws-of-nature/',

View File

@@ -49,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
class ADNIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir',
'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
'id': '9841',
@@ -71,10 +71,7 @@ class ADNIE(ADNBaseIE):
},
'skip': 'Only available in French and German speaking Europe',
}, {
'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}, {
'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
'info_dict': {
'id': '23550',
@@ -167,7 +164,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
'username': username,
})) or {}).get('accessToken')
if access_token:
self._HEADERS = {'authorization': 'Bearer ' + access_token}
self._HEADERS['Authorization'] = f'Bearer {access_token}'
except ExtractorError as e:
message = None
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
@@ -178,6 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
def _real_extract(self, url):
lang, video_id = self._match_valid_url(url).group('lang', 'id')
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
player = self._download_json(
video_base_url + 'configuration', video_id,
@@ -218,7 +216,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
links_data = self._download_json(
links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization,
'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'freeWithAds': 'true',
@@ -257,6 +254,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
load_balancer_data = self._download_json(
load_balancer_url, video_id,
f'Downloading {format_id} {quality} JSON metadata',
headers=self._HEADERS,
fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
@@ -277,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
video = (self._download_json(
self._API_BASE_URL + f'video/{video_id}', video_id,
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
show = video.get('show') or {}
return {
@@ -299,9 +297,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
class ADNSeasonIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])'
_VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
_TESTS = [{
'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new',
'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
'playlist_count': 12,
'info_dict': {
'id': '911',
@@ -312,16 +310,14 @@ class ADNSeasonIE(ADNBaseIE):
def _real_extract(self, url):
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
show = self._download_json(
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
'Downloading show JSON metadata', headers=self._HEADERS)['show']
show_id = str(show['id'])
episodes = self._download_json(
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers={
'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'Downloading episode list', headers=self._HEADERS, query={
'order': 'asc',
'limit': '-1',
})

View File

@@ -1355,6 +1355,7 @@ MSO_INFO = {
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
_MVPD_CACHE = 'ap-mvpd'
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
@@ -1454,7 +1455,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
}, headers={
# yt-dlp's default user-agent is usually too old for Comcast_SSO
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
'User-Agent': self._MODERN_USER_AGENT,
} if mso_id == 'Comcast_SSO' else None)
elif not self._cookies_passed:
raise_mvpd_required()

View File

@@ -33,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor):
}
response = self._download_json(
'https://login.afreecatv.com/app/LoginAction.php', None,
'https://login.sooplive.co.kr/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form))
_ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
-6: 'https://login.afreecatv.com/membership/changeMember.php',
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
-12: 'https://member.afreecatv.com/app/user_security.php',
-5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
-6: 'https://login.sooplive.co.kr/membership/changeMember.php',
-8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
-11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
-12: 'https://member.sooplive.co.kr/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
-7: 'You cannot use your Global Soop account to access Korean Soop.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.',
}
@@ -61,76 +61,40 @@ class AfreecaTVBaseIE(InfoExtractor):
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
return self._download_json(Request(
f'https://api.m.afreecatv.com/{endpoint}',
f'https://api.m.sooplive.co.kr/{endpoint}',
data=data, headers=headers, query=query,
extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON')
class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv'
IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x)
https?://
(?:
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
(?:
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/(PLAYER/STATION|player)/
)
(?P<id>\d+)/?(?:$|[?#&])
'''
IE_NAME = 'soop'
IE_DESC = 'sooplive.co.kr'
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
'url': 'https://vod.sooplive.co.kr/player/96753363',
'info_dict': {
'id': '36164052',
'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4',
'title': '데일리 에이프릴 요정들의 시상식!',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160503',
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+',
'upload_date': '20230108',
'timestamp': 1673218805,
'title': '젠지 페이즈',
},
'skip': 'Video is gone',
}, {
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
'info_dict': {
'id': '36153164',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'params': {
'skip_download': True,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '36153164_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'upload_date': '20160502',
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '36153164_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'upload_date': '20160502',
},
}],
'skip': 'Video is gone',
}, {
# non standard key
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
'info_dict': {
'id': '20170411_BE689A0E_190960999_1_2_h',
'ext': 'mp4',
'title': '혼자사는여자집',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
'uploader': '♥이슬이',
'uploader_id': 'dasl8121',
'upload_date': '20170411',
@@ -142,12 +106,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
},
}, {
# adult content
'url': 'https://vod.afreecatv.com/player/97267690',
'url': 'https://vod.sooplive.co.kr/player/97267690',
'info_dict': {
'id': '20180327_27901457_202289533_1',
'ext': 'mp4',
'title': '[생]빨개요♥ (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20180327',
@@ -157,36 +121,17 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'skip_download': True,
},
'skip': 'The VOD does not exist',
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
'url': 'https://vod.afreecatv.com/player/96753363',
'info_dict': {
'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4',
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
'upload_date': '20230108',
'timestamp': 1673218805,
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
}, {
# adult content
'url': 'https://vod.afreecatv.com/player/70395877',
'url': 'https://vod.sooplive.co.kr/player/70395877',
'only_matching': True,
}, {
# subscribers only
'url': 'https://vod.afreecatv.com/player/104647403',
'url': 'https://vod.sooplive.co.kr/player/104647403',
'only_matching': True,
}, {
# private
'url': 'https://vod.afreecatv.com/player/81669846',
'url': 'https://vod.sooplive.co.kr/player/81669846',
'only_matching': True,
}]
@@ -262,11 +207,11 @@ class AfreecaTVIE(AfreecaTVBaseIE):
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:catchstory'
IE_DESC = 'afreecatv.com catch story'
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
IE_NAME = 'soop:catchstory'
IE_DESC = 'sooplive.co.kr catch story'
_VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
_TESTS = [{
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
'info_dict': {
'id': '103247',
},
@@ -299,11 +244,11 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:live'
IE_DESC = 'afreecatv.com livestreams'
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
IE_NAME = 'soop:live'
IE_DESC = 'sooplive.co.kr livestreams'
_VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
_TESTS = [{
'url': 'https://play.afreecatv.com/pyh3646/237852185',
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
'info_dict': {
'id': '237852185',
'ext': 'mp4',
@@ -315,30 +260,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
},
'skip': 'Livestream has ended',
}, {
'url': 'https://play.afreecatv.com/pyh3646/237852185',
'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
'only_matching': True,
}, {
'url': 'https://play.afreecatv.com/pyh3646',
'url': 'https://play.sooplive.co.kr/pyh3646',
'only_matching': True,
}]
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
_LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
_WORKING_CDNS = [
'gcp_cdn', # live-global-cdn-v02.afreecatv.com
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
]
_BAD_CDNS = [
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
]
def _extract_formats(self, channel_info, broadcast_no, aid):
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
default_cdn_ids = orderedSet([
@@ -358,7 +303,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
try:
return self._extract_m3u8_formats(
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
headers={'Referer': 'https://play.afreecatv.com/'})
headers={'Referer': 'https://play.sooplive.co.kr/'})
except ExtractorError as e:
if attempt == len(cdn_ids):
raise
@@ -374,7 +319,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
broadcaster_id = channel_info.get('BJID') or broadcaster_id
broadcast_no = channel_info.get('BNO') or broadcast_no
if not broadcast_no:
raise UserNotLive(video_id=broadcaster_id)
result = channel_info.get('RESULT')
if result == 0:
raise UserNotLive(video_id=broadcaster_id)
elif result == -6:
self.raise_login_required(
'This channel is streaming for subscribers only', method='password')
raise ExtractorError('Unable to extract broadcast number')
password = self.get_param('videopassword')
if channel_info.get('BPWD') == 'Y' and password is None:
@@ -403,7 +354,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
formats = self._extract_formats(channel_info, broadcast_no, aid)
station_info = traverse_obj(self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
'Downloading channel metadata', 'Unable to download channel metadata',
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
@@ -419,11 +370,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
}
class AfreecaTVUserIE(InfoExtractor):
IE_NAME = 'afreecatv:user'
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
class AfreecaTVUserIE(AfreecaTVBaseIE):
IE_NAME = 'soop:user'
_VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
_TESTS = [{
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
@@ -431,7 +382,7 @@ class AfreecaTVUserIE(InfoExtractor):
},
'playlist_count': 218,
}, {
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
'info_dict': {
'_type': 'playlist',
'id': 'parang1995',
@@ -439,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor):
},
'playlist_count': 997,
}, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods',
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
@@ -447,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor):
},
'playlist_count': 221,
}, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
'info_dict': {
'_type': 'playlist',
'id': 'ryuryu24',
@@ -459,12 +410,12 @@ class AfreecaTVUserIE(InfoExtractor):
def _fetch_page(self, user_id, user_type, page):
page += 1
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
note=f'Downloading {user_type} video page {page}')
for item in info['data']:
yield self.url_result(
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
def _real_extract(self, url):
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')

View File

@@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor):
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
_TESTS = [{
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
'md5': '921919dab3cd0b849ff3d624831ae3e2',
'info_dict': {
'id': '899441',
'ext': 'mp4',
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'NFL',
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
'Player Highlights', 'Cleveland Browns', 'league'],
'duration': 157,
'categories': ['Entertainment', 'Game', 'Highlights'],
},
}, {
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
'md5': '837718bcfb3a7778d022f857f7a9b19e',
@@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
}
def _generate_nfl_token(self, anvack, mcp_id):
reroute = self._download_json(
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
headers={'X-Domain-Id': 100}, note='Fetching token info')
token_type = reroute.get('token_type') or 'Bearer'
auth_token = f'{token_type} {reroute["access_token"]}'
response = self._download_json(
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
'query': '''{
viewer {
mediaToken(anvack: "%s", id: %s) {
token
}
}
}''' % (anvack, mcp_id), # noqa: UP031
}).encode(), headers={
'Authorization': auth_token,
'Content-Type': 'application/json',
}, note='Fetching NFL API token')
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
_TOKEN_GENERATORS = {
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
}
def _server_time(self, access_key, video_id):
return int_or_none(traverse_obj(self._download_json(
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
@@ -290,8 +247,6 @@ class AnvatoIE(InfoExtractor):
}
if extracted_token is not None:
api['anvstk2'] = extracted_token
elif self._TOKEN_GENERATORS.get(access_key) is not None:
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
elif self._ANVACK_TABLE.get(access_key) is not None:
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
else:

View File

@@ -1,27 +1,42 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
clean_podcast_url,
get_element_by_class,
int_or_none,
parse_iso8601,
try_get,
)
from ..utils.traversal import traverse_obj
class ApplePodcastsIE(InfoExtractor):
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
_TESTS = [{
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
'info_dict': {
'id': '1000665010654',
'ext': 'mp3',
'title': 'Ferreck Dawn - To The Break of Dawn 117',
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
'upload_date': '20240812',
'timestamp': 1723449600,
'duration': 3596,
'series': 'Ferreck Dawn - To The Break of Dawn',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
},
}, {
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'md5': '41dc31cd650143e530d9423b6b5a344f',
'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
'info_dict': {
'id': '1000482637777',
'ext': 'mp3',
'title': '207 - Whitney Webb Returns',
'episode': '207 - Whitney Webb Returns',
'episode_number': 207,
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
'upload_date': '20200705',
'timestamp': 1593932400,
'duration': 6454,
'duration': 5369,
'series': 'The Tim Dillon Show',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
},
@@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
def _real_extract(self, url):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
episode_data = {}
ember_data = {}
# new page type 2021-11
amp_data = self._parse_json(self._search_regex(
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
amp_data = try_get(amp_data,
lambda a: self._parse_json(
next(a[x] for x in iter(a) if episode_id in x),
episode_id),
dict) or {}
amp_data = amp_data.get('d') or []
episode_data = try_get(
amp_data,
lambda a: next(x for x in a
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
dict)
if not episode_data:
# try pre 2021-11 page type: TODO: consider deleting if no longer used
ember_data = self._parse_json(self._search_regex(
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
webpage, 'ember data'), episode_id) or {}
ember_data = ember_data.get(episode_id) or ember_data
episode_data = try_get(ember_data, lambda x: x['data'], dict)
episode = episode_data['attributes']
description = episode.get('description') or {}
series = None
for inc in (amp_data or ember_data.get('included') or []):
if inc.get('type') == 'media/podcast':
series = try_get(inc, lambda x: x['attributes']['name'])
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
server_data = self._search_json(
r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
model_data = traverse_obj(server_data, (
'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
'model', {dict}, any))
return {
'id': episode_id,
'title': episode.get('name'),
'url': clean_podcast_url(episode['assetUrl']),
'description': description.get('standard') or description.get('short'),
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
'series': series,
**self._json_ld(
traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
**traverse_obj(model_data, {
'title': ('title', {str}),
'url': ('streamUrl', {clean_podcast_url}),
'timestamp': ('releaseDate', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
}),
'thumbnail': self._og_search_thumbnail(webpage),
'vcodec': 'none',
}

View File

@@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(InfoExtractor):
IE_NAME = 'ARDMediathek'
_VALID_URL = r'''(?x)https://
_VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/]+/)?
(?:player|live|video)/
@@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
'info_dict': {
'id': '94834686',
'ext': 'mp4',
'duration': 2700,
'duration': 2670,
'episode': '7 Tage ... unter harten Jungs',
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
'upload_date': '20231005',
@@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'series': '7 Tage ...',
'channel': 'HR',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
'title': '7 Tage ... unter harten Jungs',
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
},
}, {
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
'info_dict': {
'id': '13847165',
'chapters': 'count:8',
'ext': 'mp4',
'channel': 'WDR',
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
'series': 'Lokalzeit aus Düsseldorf',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
'upload_date': '20241031',
'timestamp': 1730399400,
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
'duration': 1759,
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
},
}, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
'only_matching': True,
@@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
'subtitles': subtitles,
'is_live': is_live,
'age_limit': age_limit,
**traverse_obj(media_data, {
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
'start_time': ('chapterTime', {int_or_none}),
'title': ('chapterTitle', {str}),
}),
}),
**traverse_obj(media_data, ('meta', {
'title': 'title',
'description': 'synopsis',
@@ -470,7 +494,7 @@ class ARDBetaMediathekIE(InfoExtractor):
class ARDMediathekCollectionIE(InfoExtractor):
_VALID_URL = r'''(?x)https://
_VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/?#]+/)?
(?P<playlist>sendung|serie|sammlung)/

View File

@@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
self._HEADERS['Authorization'] = f'Bearer {token}'
def _real_extract(self, url):
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
webpage = self._download_webpage(url, video_id)
event_data = traverse_obj(
self._search_nextjs_data(webpage, video_id, default={}),
('props', 'pageProps', 'eventCMSData', {

View File

@@ -1,3 +1,5 @@
import functools
import json
import random
import re
import time
@@ -6,7 +8,9 @@ from .common import InfoExtractor
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
extract_attributes,
float_or_none,
get_element_html_by_id,
int_or_none,
parse_filesize,
str_or_none,
@@ -17,6 +21,7 @@ from ..utils import (
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class BandcampIE(InfoExtractor):
@@ -459,7 +464,7 @@ class BandcampUserIE(InfoExtractor):
},
}, {
'url': 'https://coldworldofficial.bandcamp.com/music',
'playlist_mincount': 10,
'playlist_mincount': 7,
'info_dict': {
'id': 'coldworldofficial',
'title': 'Discography of coldworldofficial',
@@ -473,12 +478,19 @@ class BandcampUserIE(InfoExtractor):
},
}]
def _yield_items(self, webpage):
yield from (
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
yield from traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
'data-client-items', {json.loads}, ..., 'page_url', {str}))
def _real_extract(self, url):
uploader = self._match_id(url)
webpage = self._download_webpage(url, uploader)
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
return self.playlist_from_matches(
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url))

View File

@@ -0,0 +1,68 @@
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_iso8601,
traverse_obj,
)
class BeaconTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beacon.tv/content/welcome-to-beacon',
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
'info_dict': {
'id': 'welcome-to-beacon',
'ext': 'mp4',
'upload_date': '20240509',
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
'title': 'Your home for Critical Role!',
'timestamp': 1715227200,
'duration': 105.494,
},
}, {
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
'md5': 'd879b091485dbed2245094c8152afd89',
'info_dict': {
'id': 're-slayers-take-trailer',
'ext': 'mp4',
'title': 'The Re-Slayers Take | Official Trailer',
'timestamp': 1715189040,
'upload_date': '20240508',
'duration': 53.249,
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', '__APOLLO_STATE__',
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
if not content_data:
raise ExtractorError('Failed to extract content data')
jwplayer_data = traverse_obj(content_data, (
(('contentVideo', 'video', 'videoData'),
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
if not jwplayer_data:
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
raise ExtractorError('Content is not a video/podcast', expected=True)
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
self.raise_login_required('This video/podcast is for members only')
raise ExtractorError('Failed to extract content')
return {
**self._parse_jwplayer_data(jwplayer_data, video_id),
**traverse_obj(content_data, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
}),
}

View File

@@ -1,18 +1,33 @@
import re
from .common import InfoExtractor
from ..utils import extract_attributes
from ..utils import ExtractorError, extract_attributes
class BFMTVBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _brightcove_url_result(self, video_id, video_block):
account_id = video_block.get('accountid') or '876450612001'
player_id = video_block.get('playerid') or 'I2qBTln4u'
def _extract_video(self, video_block):
video_element = self._search_regex(
self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
if video_element:
video_element_attrs = extract_attributes(video_element)
video_id = video_element_attrs.get('data-video-id')
if not video_id:
return
account_id = video_element_attrs.get('data-account') or '876450610001'
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
else:
video_block_attrs = extract_attributes(video_block)
video_id = video_block_attrs.get('videoid')
if not video_id:
return
account_id = video_block_attrs.get('accountid') or '876630703001'
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
'BrightcoveNew', video_id)
@@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
def _real_extract(self, url):
bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id)
video_block = extract_attributes(self._search_regex(
video = self._extract_video(self._search_regex(
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
return self._brightcove_url_result(video_block['videoid'], video_block)
if not video:
raise ExtractorError('Failed to extract video')
return video
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
class BFMTVLiveIE(BFMTVBaseIE):
IE_NAME = 'bfmtv:live'
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
_TESTS = [{
'url': 'https://www.bfmtv.com/en-direct/',
'info_dict': {
'id': '5615950982001',
'id': '6346069778112',
'ext': 'mp4',
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader_id': '876450610001',
'upload_date': '20220926',
'timestamp': 1664207191,
'upload_date': '20240202',
'timestamp': 1706887572,
'live_status': 'is_live',
'thumbnail': r're:https://.+/image\.jpg',
'tags': [],
@@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
'only_matching': True,
}]
def _real_extract(self, url):
bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id)
video = self._extract_video(self._search_regex(
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
if not video:
raise ExtractorError('Failed to extract video')
return video
class BFMTVArticleIE(BFMTVBaseIE):
IE_NAME = 'bfmtv:article'
@@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
},
}]
def _entries(self, webpage):
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
video = self._extract_video(video_block_el)
if video:
yield video
def _real_extract(self, url):
bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id)
entries = []
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
video_block = extract_attributes(video_block_el)
video_id = video_block.get('videoid')
if not video_id:
continue
entries.append(self._brightcove_url_result(video_id, video_block))
return self.playlist_result(
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
self._html_search_meta(['og:description', 'description'], webpage))

View File

@@ -46,6 +46,7 @@ from ..utils import (
class BilibiliBaseIE(InfoExtractor):
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
_wbi_key_cache = {}
@@ -192,7 +193,7 @@ class BilibiliBaseIE(InfoExtractor):
video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}')
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
self.report_warning(
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
@@ -207,7 +208,7 @@ class BilibiliBaseIE(InfoExtractor):
def _get_chapters(self, aid, cid):
chapters = aid and cid and self._download_json(
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
note='Extracting chapters', fatal=False)
note='Extracting chapters', fatal=False, headers=self._HEADERS)
return traverse_obj(chapters, ('data', 'view_points', ..., {
'title': 'content',
'start_time': 'from',
@@ -298,7 +299,7 @@ class BilibiliBaseIE(InfoExtractor):
class BiliBiliIE(BilibiliBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bilibili.com/video/BV13x41117TL',
@@ -622,6 +623,10 @@ class BiliBiliIE(BilibiliBaseIE):
'ext': 'mp4',
},
'skip': 'geo-restricted',
}, {
'note': 'has - in the last path segment of the url',
'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -1017,8 +1022,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
class BilibiliCheeseBaseIE(BilibiliBaseIE):
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
def _extract_episode(self, season_info, ep_id):
episode_info = traverse_obj(season_info, (
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
@@ -1848,7 +1851,7 @@ class BiliBiliPlayerIE(InfoExtractor):
class BiliIntlBaseIE(InfoExtractor):
_API_URL = 'https://api.bilibili.tv/intl/gateway'
_NETRC_MACHINE = 'biliintl'
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
def _call_api(self, endpoint, *args, **kwargs):
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)

388
yt_dlp/extractor/bluesky.py Normal file
View File

@@ -0,0 +1,388 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
format_field,
int_or_none,
mimetype2ext,
orderedSet,
parse_iso8601,
truncate_string,
update_url_query,
url_basename,
url_or_none,
variadic,
)
from ..utils.traversal import traverse_obj
class BlueskyIE(InfoExtractor):
_VALID_URL = [
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
]
_TESTS = [{
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
'md5': '375539c1930ab05d15585ed772ab54fd',
'info_dict': {
'id': '3l4omssdl632g',
'ext': 'mp4',
'uploader': 'Blu3Blu3Lilith',
'uploader_id': 'blu3blue.bsky.social',
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'OMG WE HAVE VIDEOS NOW',
'description': 'OMG WE HAVE VIDEOS NOW',
'upload_date': '20240921',
'timestamp': 1726940605,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
'md5': 'b9e344fdbce9f2852c668a97efefb105',
'info_dict': {
'id': '3l3vgf77uco2g',
'ext': 'mp4',
'uploader': 'Bluesky',
'uploader_id': 'bsky.app',
'uploader_url': 'https://bsky.app/profile/bsky.app',
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky now has video! Update your app to versi...',
'alt_title': 'Bluesky video feature announcement',
'description': r're:(?s)Bluesky now has video! .{239}',
'upload_date': '20240911',
'timestamp': 1726074716,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
'subtitles': {
'en': 'mincount:1',
},
},
}, {
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
'info_dict': {
'id': '3l4qhp7bcs52c',
'ext': 'mp4',
'uploader': 'souris',
'uploader_id': 'souris.moe',
'uploader_url': 'https://bsky.app/profile/souris.moe',
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l4qhp7bcs52c',
'upload_date': '20240922',
'timestamp': 1727003838,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
'info_dict': {
'id': '3l3w4tnezek2e',
'ext': 'mp4',
'uploader': 'clean',
'uploader_id': 'de1.pds.tentacle.expert',
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
'channel_id': 'did:web:de1.tentacle.expert',
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l3w4tnezek2e',
'upload_date': '20240911',
'timestamp': 1726098823,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
'info_dict': {
'id': 'XxK3t_5V3ao',
'ext': 'mp4',
'uploader': 'yunayu',
'uploader_id': '@yunayuispink',
'uploader_url': 'https://www.youtube.com/@yunayuispink',
'channel': 'yunayu',
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
'description': r're:Have a good goodx10000day',
'title': '5min vs 5hours drawing',
'availability': 'public',
'live_status': 'not_live',
'playable_in_embed': True,
'upload_date': '20241026',
'timestamp': 1729967784,
'duration': 321,
'age_limit': 0,
'like_count': int,
'view_count': int,
'comment_count': int,
'channel_follower_count': int,
'categories': ['Entertainment'],
'tags': [],
},
'add_ie': ['Youtube'],
}, {
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
'info_dict': {
'id': '222792849',
'ext': 'mp3',
'uploader': 'LASERBAT',
'uploader_id': 'laserbatx',
'uploader_url': 'https://laserbatx.bandcamp.com',
'artists': ['LASERBAT'],
'album_artists': ['LASERBAT'],
'album': 'Hari Nezumi [EP]',
'track': 'Forward to the End',
'title': 'LASERBAT - Forward to the End',
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
'duration': 228.571,
'track_id': '222792849',
'release_date': '20230423',
'upload_date': '20230423',
'timestamp': 1682276040.0,
'release_timestamp': 1682276040.0,
'track_number': 1,
},
'add_ie': ['Bandcamp'],
}, {
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
'md5': 'b9e344fdbce9f2852c668a97efefb105',
'info_dict': {
'id': '3l3vgf77uco2g',
'ext': 'mp4',
'uploader': 'Bluesky',
'uploader_id': 'bsky.app',
'uploader_url': 'https://bsky.app/profile/bsky.app',
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky now has video! Update your app to versi...',
'alt_title': 'Bluesky video feature announcement',
'description': r're:(?s)Bluesky now has video! .{239}',
'upload_date': '20240911',
'timestamp': 1726074716,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
'subtitles': {
'en': 'mincount:1',
},
},
}, {
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
'info_dict': {
'id': '3l7rdfxhyds2f',
'ext': 'mp4',
'uploader': 'cinnamon',
'uploader_id': 'alt.bun.how',
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'crazy that i look like this tbh',
'description': 'crazy that i look like this tbh',
'upload_date': '20241030',
'timestamp': 1730332128,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': ['sexual'],
'age_limit': 18,
},
}, {
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
'info_dict': {
'id': '3l6zrz6zyl2dr',
'ext': 'mp4',
'uploader': 'mary🐇',
'uploader_id': 'mary.my.id',
'uploader_url': 'https://bsky.app/profile/mary.my.id',
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l6zrz6zyl2dr',
'upload_date': '20241021',
'timestamp': 1729523172,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
'info_dict': {
'id': '3l7gv55dc2o2w',
},
'playlist': [{
'info_dict': {
'id': '3l7gv55dc2o2w',
'ext': 'mp4',
'upload_date': '20241026',
'description': 'One of my favorite videos',
'comment_count': int,
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
'uploader': 'Purple.Ice.Tea',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
'like_count': int,
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
'repost_count': int,
'timestamp': 1729973202,
'tags': [],
'uploader_id': 'purpleicetea.bsky.social',
'title': 'One of my favorite videos',
},
}, {
'info_dict': {
'id': '3l77u64l7le2e',
'ext': 'mp4',
'title': 'hearing people on twitter say that bluesky isn\'...',
'like_count': int,
'uploader_id': 'thafnine.net',
'uploader_url': 'https://bsky.app/profile/thafnine.net',
'upload_date': '20241024',
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
'tags': [],
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
'uploader': 'T9',
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'timestamp': 1729731642,
'comment_count': int,
'repost_count': int,
},
}],
}]
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
def _get_service_endpoint(self, did, video_id):
if did.startswith('did:web:'):
url = f'https://{did[8:]}/.well-known/did.json'
else:
url = f'https://plc.directory/{did}'
services = self._download_json(
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
return traverse_obj(
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
def _real_extract(self, url):
handle, video_id = self._match_valid_url(url).group('handle', 'id')
post = self._download_json(
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
video_id, query={
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
'depth': 0,
'parentHeight': 0,
})['thread']['post']
entries = []
# app.bsky.embed.video.view/app.bsky.embed.external.view
entries.extend(self._extract_videos(post, video_id))
# app.bsky.embed.recordWithMedia.view
entries.extend(self._extract_videos(
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
# app.bsky.embed.record.view
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
entries.extend(self._extract_videos(
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
if not entries:
raise ExtractorError('No video could be found in this post', expected=True)
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries, video_id)
@staticmethod
def _build_profile_url(path):
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
embed_path = variadic(embed_path, (str, bytes, dict, set))
record_path = variadic(record_path, (str, bytes, dict, set))
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
entries = []
if external_uri := traverse_obj(root, (
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
entries.append(self.url_result(external_uri))
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
return entries
video_cid = traverse_obj(
root, (*embed_path, 'cid', {str}),
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
did = traverse_obj(root, ('author', 'did', {str}))
if did and video_cid:
endpoint = self._get_service_endpoint(did, video_id)
formats.append({
'format_id': 'blob',
'url': update_url_query(
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
**traverse_obj(root, (*embed_path, 'aspectRatio', {
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
})),
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
'filesize': ('size', {int_or_none}),
'ext': ('mimeType', {mimetype2ext}),
})),
})
for sub_data in traverse_obj(root, (
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
'url': update_url_query(
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
})
entries.append({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(root, {
'id': ('uri', {url_basename}),
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
'alt_title': (*embed_path, 'alt', {str}, filter),
'uploader': ('author', 'displayName', {str}),
'uploader_id': ('author', 'handle', {str}),
'uploader_url': ('author', 'handle', {self._build_profile_url}),
'channel_id': ('author', 'did', {str}),
'channel_url': ('author', 'did', {self._build_profile_url}),
'like_count': ('likeCount', {int_or_none}),
'repost_count': ('repostCount', {int_or_none}),
'comment_count': ('replyCount', {int_or_none}),
'timestamp': ('indexedAt', {parse_iso8601}),
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
'age_limit': (
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
'description': (*record_path, 'text', {str}, filter),
'title': (*record_path, 'text', {lambda x: x.replace('\n', '')}, {truncate_string(left=50)}),
}),
})
return entries

View File

@@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
class CallinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
_TESTS = [{
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
'info_dict': {

View File

@@ -4,7 +4,6 @@ import json
import re
import time
import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor
from ..networking import HEADRequest
@@ -12,7 +11,6 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
join_nonempty,
js_to_json,
mimetype2ext,
orderedSet,
@@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor):
_TESTS = [{
# This is a normal, public, TV show video
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
'info_dict': {
'id': 'schitts-creek/s06e01',
'ext': 'mp4',
'title': 'Smoke Signals',
'description': 'md5:929868d20021c924020641769eb3e7f1',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
'duration': 1314,
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
'duration': 1324,
'categories': ['comedy'],
'series': 'Schitt\'s Creek',
'season': 'Season 6',
@@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor):
'episode': 'Smoke Signals',
'episode_number': 1,
'episode_id': 'schitts-creek/s06e01',
'upload_date': '20210618',
'timestamp': 1623988800,
'release_date': '20200107',
'release_timestamp': 1578427200,
},
'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}, {
# This video requires an account in the browser, but works fine in yt-dlp
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
'md5': '297a9600f554f2258aed01514226a697',
'info_dict': {
'id': 'schitts-creek/s01e01',
'ext': 'mp4',
'title': 'The Cup Runneth Over',
'description': 'md5:9bca14ea49ab808097530eb05a29e797',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
'series': 'Schitt\'s Creek',
'season_number': 1,
'season': 'Season 1',
@@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor):
'episode_id': 'schitts-creek/s01e01',
'duration': 1309,
'categories': ['comedy'],
'upload_date': '20210617',
'timestamp': 1623902400,
'release_date': '20151124',
'release_timestamp': 1448323200,
},
'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}, {
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
'only_matching': True,
@@ -631,38 +633,6 @@ class CBCGemIE(InfoExtractor):
return
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
def _find_secret_formats(self, formats, video_id):
""" Find a valid video url and convert it to the secret variant """
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
if not base_format:
return
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
return
for child in secret_xml:
if child.attrib.get('Type') != 'video':
continue
for video_quality in child:
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
if not bitrate or 'Index' not in video_quality.attrib:
continue
height = int_or_none(video_quality.attrib.get('MaxHeight'))
yield {
**base_format,
'format_id': join_nonempty('sec', height),
# Note: \g<1> is necessary instead of \1 since bitrate is a number
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
'tbr': bitrate / 1000.0,
'height': height,
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_info = self._download_json(
@@ -676,7 +646,6 @@ class CBCGemIE(InfoExtractor):
else:
headers = {}
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
m3u8_url = m3u8_info.get('url')
if m3u8_info.get('errorCode') == 1:
self.raise_geo_restricted(countries=['CA'])
@@ -685,9 +654,9 @@ class CBCGemIE(InfoExtractor):
elif m3u8_info.get('errorCode') != 0:
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
formats = self._extract_m3u8_formats(
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
self._remove_duplicate_formats(formats)
formats.extend(self._find_secret_formats(formats, video_id))
for fmt in formats:
if fmt.get('vcodec') == 'none':
@@ -703,20 +672,21 @@ class CBCGemIE(InfoExtractor):
return {
'id': video_id,
'title': video_info['title'],
'description': video_info.get('description'),
'thumbnail': video_info.get('image'),
'series': video_info.get('series'),
'season_number': video_info.get('season'),
'season': f'Season {video_info.get("season")}',
'episode_number': video_info.get('episode'),
'episode': video_info.get('title'),
'episode_id': video_id,
'duration': video_info.get('duration'),
'categories': [video_info.get('category')],
'formats': formats,
'release_timestamp': video_info.get('airDate'),
'timestamp': video_info.get('availableDate'),
**traverse_obj(video_info, {
'title': ('title', {str}),
'episode': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('image', {url_or_none}),
'series': ('series', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'duration': ('duration', {int_or_none}),
'categories': ('category', {str}, all),
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
}),
}

View File

@@ -12,53 +12,86 @@ from ..utils import (
class CCMAIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': {
'id': '5630208',
'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3',
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1478608140,
'upload_date': '20161108',
'age_limit': 0,
'alt_title': 'EsportMarató2016WEB_PerPublicar',
'duration': 79,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
'categories': ['Divulgació'],
},
}, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425',
'info_dict': {
'id': '943685',
'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20170512',
'timestamp': 1494622500,
'upload_date': '20161217',
'timestamp': 1482011700,
'vcodec': 'none',
'categories': ['Esports'],
'series': 'Tot gira',
'duration': 821,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
},
}, {
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
'md5': '27493513d08a3e5605814aee9bb778d2',
'info_dict': {
'id': '6031387',
'ext': 'mp4',
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
'timestamp': 1582577700,
'timestamp': 1582577919,
'upload_date': '20200224',
'subtitles': 'mincount:4',
'age_limit': 16,
'subtitles': 'mincount:1',
'age_limit': 13,
'series': 'Crims',
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
'duration': 3203,
'categories': ['Divulgació'],
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
'episode_number': 5,
'episode': 'Episode 5',
},
}, {
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
'info_dict': {
'id': '5759227',
'ext': 'mp4',
'title': 'Una mosca volava per la llum',
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
'description': 'md5:9ab64276944b0825336f4147f13f7854',
'series': 'Mic',
'upload_date': '20180411',
'timestamp': 1523440105,
'duration': 160,
'age_limit': 0,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
'categories': ['Música'],
},
}]
def _real_extract(self, url):
media_type, media_id = self._match_valid_url(url).groups()
media_type, media_id = self._match_valid_url(url).group('type', 'id')
media = self._download_json(
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
'media': media_type,
'idint': media_id,
'format': 'dm',

View File

@@ -12,6 +12,7 @@ from .common import InfoExtractor
from ..compat import compat_ord
from ..utils import (
ExtractorError,
OnDemandPagedList,
float_or_none,
int_or_none,
merge_dicts,
@@ -351,3 +352,50 @@ class CDAIE(InfoExtractor):
extract_format(webpage, resolution)
return merge_dicts(info_dict, info)
class CDAFolderIE(InfoExtractor):
_MAX_PAGE_SIZE = 36
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://www.cda.pl/domino264/folder/31188385',
'info_dict': {
'id': '31188385',
'title': 'SERIA DRUGA',
},
'playlist_mincount': 13,
},
{
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
'info_dict': {
'id': '2664592',
'title': 'VideoDowcipy - wszystkie odcinki',
},
'playlist_mincount': 71,
},
{
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
'info_dict': {
'id': '19129979',
'title': 'TESTY KOSMETYKÓW',
},
'playlist_mincount': 139,
}]
def _real_extract(self, url):
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
webpage = self._download_webpage(url, folder_id)
def extract_page_entries(page):
webpage = self._download_webpage(
f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
f'Downloading page {page + 1}', expected_status=404)
items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
for video_id in items:
yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
return self.playlist_result(
OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
folder_id, self._og_search_title(webpage))

View File

@@ -146,19 +146,33 @@ class CHZZKVideoIE(InfoExtractor):
video_meta = self._download_json(
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
note='Downloading video info', errnote='Unable to download video info')['content']
formats, subtitles = self._extract_mpd_formats_and_subtitles(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
query={
'key': video_meta['inKey'],
'env': 'real',
'lc': 'en_US',
'cpl': 'en_US',
}, note='Downloading video playback', errnote='Unable to download video playback')
live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
video_status = video_meta.get('vodStatus')
if video_status == 'UPLOAD':
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
elif video_status == 'ABR_HLS':
formats, subtitles = self._extract_mpd_formats_and_subtitles(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
video_id, query={
'key': video_meta['inKey'],
'env': 'real',
'lc': 'en_US',
'cpl': 'en_US',
})
else:
self.raise_no_formats(
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
formats, subtitles = [], {}
live_status = 'post_live' if live_status == 'was_live' else None
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'live_status': live_status,
**traverse_obj(video_meta, {
'title': ('videoTitle', {str}),
'thumbnail': ('thumbnailImageUrl', {url_or_none}),

View File

@@ -1,146 +1,226 @@
import functools
import json
import re
from .common import InfoExtractor
from .turner import TurnerBaseIE
from ..utils import merge_dicts, try_call, url_basename
from ..utils import (
clean_html,
extract_attributes,
int_or_none,
merge_dicts,
parse_duration,
parse_iso8601,
parse_resolution,
try_call,
update_url,
url_or_none,
)
from ..utils.traversal import find_elements, traverse_obj
class CNNIE(TurnerBaseIE):
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
class CNNIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
'md5': '3e6121ea48df7e2259fe73a0628605c4',
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'info_dict': {
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'ext': 'mp4',
'title': 'Nadal wins 8th French Open title',
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
'duration': 135,
'upload_date': '20130609',
'upload_date': '20240531',
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
'duration': 373.0,
'timestamp': 1717148586,
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
'modified_date': '20240531',
'modified_timestamp': 1717150140,
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'info_dict': {
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'ext': 'mp4',
'title': "Student's epic speech stuns new freshmen",
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
'upload_date': '20130821',
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
'title': 'Heres how some inmates in closely divided state are now able to vote from jail',
'timestamp': 1718158269,
'upload_date': '20240612',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
'duration': 202.0,
'modified_date': '20240612',
'modified_timestamp': 1718158509,
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
'info_dict': {
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
'ext': 'mp4',
'title': 'Nashville Ep. 1: Hand crafted skateboards',
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
'title': 'Video shows King Charles\' portrait being vandalized by activists',
'timestamp': 1718113852,
'upload_date': '20240611',
'duration': 51.0,
'modified_timestamp': 1718116193,
'modified_date': '20240611',
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'info_dict': {
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'ext': 'mp4',
'title': '5 stunning stats about Netflix',
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
'upload_date': '20160819',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
'duration': 158.0,
'title': 'Robin Meade signs off after HLN\'s last broadcast',
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
'upload_date': '20221205',
'timestamp': 1670284296,
'modified_timestamp': 1670332404,
'modified_date': '20221206',
},
'params': {
# m3u8 download
'skip_download': True,
'params': {'format': 'direct'},
}, {
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
'info_dict': {
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
'ext': 'mp4',
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
'timestamp': 1729501452,
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
'description': 'md5:256ee7137d161f776cda429654135e52',
'upload_date': '20241021',
'duration': 31.0,
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
'modified_date': '20241021',
'modified_timestamp': 1729501530,
},
}, {
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
'only_matching': True,
}, {
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
'only_matching': True,
}, {
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
'only_matching': True,
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
'info_dict': {
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
},
'playlist_count': 2,
'playlist': [{
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
'info_dict': {
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
'ext': 'mp4',
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
'duration': 173.0,
'timestamp': 1729122182,
'upload_date': '20241016',
'modified_timestamp': 1729194706,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}, {
'md5': '11604ab4af83b650826753f1ccb8ecff',
'info_dict': {
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
'ext': 'mp4',
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
'duration': 145.0,
'timestamp': 1729137765,
'upload_date': '20241017',
'modified_timestamp': 1729138184,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}],
}]
_CONFIG = {
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
'edition': {
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
},
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
'money': {
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
'media_src': 'http://ht3.cdn.turner.com/money/big',
},
}
def _extract_timestamp(self, video_data):
# TODO: fix timestamp extraction
return None
def _real_extract(self, url):
sub_domain, path, page_title = self._match_valid_url(url).groups()
if sub_domain not in ('money', 'edition'):
sub_domain = 'edition'
config = self._CONFIG[sub_domain]
return self._extract_cvp_info(
config['data_src'] % path, page_title, {
'default': {
'media_src': config['media_src'],
},
'f4m': {
'host': 'cnn-vh.akamaihd.net',
},
display_id = self._match_valid_url(url).group('display_id')
webpage = self._download_webpage(url, display_id)
app_id = traverse_obj(
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
('TOP_AUTH_SERVICE_APP_ID', {str}))
entries = []
for player_data in traverse_obj(webpage, (
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
media_id = player_data['data-media-id']
parent_uri = player_data.get('data-video-resource-parent-uri')
formats, subtitles = [], {}
video_data = {}
if parent_uri:
video_data = self._download_json(
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
query={
'id': media_id,
'stellarUri': parent_uri,
})
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
resolution, bitrate = None, None
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
resolution, bitrate = mobj.group('res', 'tbr')
formats.append({
'url': direct_url,
'format_id': 'direct',
'quality': 1,
'tbr': int_or_none(bitrate),
**parse_resolution(resolution),
})
for sub_data in traverse_obj(video_data, (
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
'url': sub_data['url'],
'name': sub_data.get('label'),
})
if app_id:
media_data = self._download_json(
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
query={'appId': app_id})
m3u8_url = traverse_obj(media_data, (
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
if m3u8_url:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
entries.append({
**traverse_obj(player_data, {
'title': ('data-headline', {clean_html}),
'description': ('data-description', {clean_html}),
'duration': ('data-duration', {parse_duration}),
'timestamp': ('data-publish-date', {parse_iso8601}),
'thumbnail': (
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
{functools.partial(update_url, query='c=original')}),
'display_id': 'data-video-slug',
}),
**traverse_obj(video_data, {
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
'description': ('description', {clean_html}),
'title': ('headline', {str}),
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
'duration': ('trt', {int_or_none}),
}),
'id': media_id,
'formats': formats,
'subtitles': subtitles,
})
if len(entries) == 1:
return {
**entries[0],
'display_id': display_id,
}
class CNNBlogsIE(InfoExtractor):
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
_TEST = {
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
'info_dict': {
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
'ext': 'mp4',
'title': 'Criminalizing journalism?',
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return self.url_result(cnn_url, CNNIE.ie_key())
class CNNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
_TEST = {
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
'info_dict': {
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
'ext': 'mp4',
'title': 'Obama: Cyberattack not an act of war',
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
'upload_date': '20141221',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
return self.playlist_result(entries, display_id)
class CNNIndonesiaIE(InfoExtractor):

View File

@@ -35,6 +35,7 @@ from ..networking import HEADRequest, Request
from ..networking.exceptions import (
HTTPError,
IncompleteRead,
TransportError,
network_exceptions,
)
from ..networking.impersonate import ImpersonateTarget
@@ -46,6 +47,7 @@ from ..utils import (
FormatSorter,
GeoRestrictedError,
GeoUtils,
ISO639Utils,
LenientJSONDecoder,
Popen,
RegexNotFoundError,
@@ -332,7 +334,7 @@ class InfoExtractor:
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
repost_count: Number of reposts of the video
average_rating: Average rating give by users, the scale used depends on the webpage
average_rating: Average rating given by users, the scale used depends on the webpage
comment_count: Number of comments on the video
comments: A list of comments, each with one or more of the following
properties (all but one of text or html optional):
@@ -519,7 +521,7 @@ class InfoExtractor:
or _extract_from_webpage as necessary. While these are normally classmethods,
_extract_from_webpage is allowed to be an instance method.
_extract_from_webpage may raise self.StopExtraction() to stop further
_extract_from_webpage may raise self.StopExtraction to stop further
processing of the webpage and obtain exclusive rights to it. This is useful
when the extractor cannot reliably be matched using just the URL,
e.g. invidious/peertube instances
@@ -572,13 +574,13 @@ class InfoExtractor:
def _login_hint(self, method=NO_DEFAULT, netrc=None):
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'
return {
None: '',
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}',
'password': f'Use {password_hint}',
'cookies': (
'Use --cookies-from-browser or --cookies for the authentication. '
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}',
'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}',
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
def __init__(self, downloader=None):
@@ -965,6 +967,9 @@ class InfoExtractor:
return False
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
encoding=encoding, data=data)
if content is False:
assert not fatal
return False
return (content, urlh)
@staticmethod
@@ -1039,7 +1044,15 @@ class InfoExtractor:
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
prefix=None, encoding=None, data=None):
webpage_bytes = urlh.read()
try:
webpage_bytes = urlh.read()
except TransportError as err:
errmsg = f'{video_id}: Error reading response: {err.msg}'
if fatal:
raise ExtractorError(errmsg, cause=err)
self.report_warning(errmsg)
return False
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
if self.get_param('dump_intermediate_pages', False):
@@ -1396,6 +1409,13 @@ class InfoExtractor:
return None, None
self.write_debug(f'Using netrc for {netrc_machine} authentication')
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
if sys.version_info < (3, 11):
return tuple(x if x != '""' else '' for x in info[::2])
return info[0], info[2]
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
@@ -1698,7 +1718,7 @@ class InfoExtractor:
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
if rating is not None:
info['average_rating'] = rating
if is_type(e, 'TVEpisode', 'Episode'):
if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
episode_name = unescapeHTML(e.get('name'))
info.update({
'episode': episode_name,
@@ -2065,7 +2085,7 @@ class InfoExtractor:
has_drm = HlsFD._has_drm(m3u8_doc)
def format_url(url):
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@@ -2800,11 +2820,11 @@ class InfoExtractor:
base_url_e = element.find(_add_ns('BaseURL'))
if try_call(lambda: base_url_e.text) is not None:
base_url = base_url_e.text + base_url
if re.match(r'^https?://', base_url):
if re.match(r'https?://', base_url):
break
if mpd_base_url and base_url.startswith('/'):
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
elif mpd_base_url and not re.match(r'^https?://', base_url):
elif mpd_base_url and not re.match(r'https?://', base_url):
if not mpd_base_url.endswith('/'):
mpd_base_url += '/'
base_url = mpd_base_url + base_url
@@ -2894,7 +2914,7 @@ class InfoExtractor:
}
def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path'
return 'url' if re.match(r'https?://', location) else 'path'
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
@@ -3059,7 +3079,11 @@ class InfoExtractor:
url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name')
stream_language = stream.get('Language', 'und')
# IsmFD expects ISO 639 Set 2 language codes (3-character length)
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
stream_language = stream.get('Language') or 'und'
if len(stream_language) != 3:
stream_language = ISO639Utils.short2long(stream_language) or 'und'
for track in stream.findall('QualityLevel'):
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
@@ -3489,7 +3513,7 @@ class InfoExtractor:
continue
urls.add(source_url)
source_type = source.get('type') or ''
ext = mimetype2ext(source_type) or determine_ext(source_url)
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',

View File

@@ -6,12 +6,37 @@ from ..utils import (
parse_iso8601,
smuggle_url,
str_or_none,
update_url_query,
)
class CWTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
_TESTS = [{
'url': 'https://www.cwtv.com/shows/all-american-homecoming/ready-or-not/?play=d848488f-f62a-40fd-af1f-6440b1821aab',
'info_dict': {
'id': 'd848488f-f62a-40fd-af1f-6440b1821aab',
'ext': 'mp4',
'title': 'Ready Or Not',
'description': 'Simone is concerned about changes taking place at Bringston; JR makes a decision about his future.',
'thumbnail': r're:^https?://.*\.jpe?g$',
'duration': 2547,
'timestamp': 1720519200,
'uploader': 'CWTV',
'chapters': 'count:6',
'series': 'All American: Homecoming',
'season_number': 3,
'episode_number': 1,
'age_limit': 0,
'upload_date': '20240709',
'season': 'Season 3',
'episode': 'Episode 1',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
'info_dict': {
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
@@ -69,13 +94,14 @@ class CWTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
video_id)
f'https://images.cwtv.com/feed/mobileapp/video-meta/apiversion_12/guid_{video_id}', video_id)
if data.get('result') != 'ok':
raise ExtractorError(data['msg'], expected=True)
video_data = data['video']
title = video_data['title']
mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U'
mpx_url = update_url_query(
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
{'formats': 'M3U+none'})
season = str_or_none(video_data.get('season'))
episode = str_or_none(video_data.get('episode'))

View File

@@ -10,11 +10,14 @@ from ..utils import (
OnDemandPagedList,
age_restricted,
clean_html,
extract_attributes,
int_or_none,
traverse_obj,
try_get,
unescapeHTML,
unsmuggle_url,
update_url,
url_or_none,
urlencode_postdata,
)
@@ -98,12 +101,20 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'''(?ix)
https?://
(?:
dai\.ly/|
(?:
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
(?:www\.)?lequipe\.fr/video
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
(?:www\.)?lequipe\.fr
)/
(?:
swf/(?!video)|
(?:(?:crawler|embed|swf)/)?video/|
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
)
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
'''
)
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
'''
IE_NAME = 'dailymotion'
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
_TESTS = [{
@@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'view_count': int,
'like_count': int,
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
},
}, {
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
@@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'view_count': int,
'like_count': int,
'tags': ['en_quete_d_esprit'],
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
},
}, {
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@@ -217,6 +228,66 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, {
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
'only_matching': True,
}, { # playlist-only
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
'only_matching': True,
}, {
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
'only_matching': True,
}, {
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
'only_matching': True,
}, {
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
'only_matching': True,
}, {
'url': 'https://dai.ly/x94cnnk',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
'info_dict': {
'id': 'x93blhi',
'ext': 'mp4',
'title': 'OnAir - 01/08/24',
'description': '',
'duration': 217,
'timestamp': 1722505658,
'upload_date': '20240801',
'uploader': 'Financialounge',
'uploader_id': 'x2vtgmm',
'age_limit': 0,
'tags': [],
'view_count': int,
'like_count': int,
},
}, {
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
'info_dict': {
'id': 'x7wdsj',
},
'playlist_mincount': 50,
}, {
# https://www.dailymotion.com/crawler/video/x8u4owg
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
'info_dict': {
'id': 'x8u4owg',
'ext': 'mp4',
'like_count': int,
'uploader': 'Le Parisien',
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
'upload_date': '20240309',
'view_count': int,
'timestamp': 1709997866,
'age_limit': 0,
'uploader_id': 'x32f7b',
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
'duration': 428.0,
'description': 'À bord du « véloto », lalternative à la voiture pour la campagne',
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
},
}]
_GEO_BYPASS = False
_COMMON_MEDIA_FIELDS = '''description
@@ -232,16 +303,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
for mobj in re.finditer(
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
for mobj in re.finditer(
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
attrs = extract_attributes(mobj.group(0))
player_url = url_or_none(attrs.get('src'))
if not player_url:
continue
player_url = player_url.replace('.js', '.html')
if player_url.startswith('//'):
player_url = f'https:{player_url}'
if video_id := attrs.get('data-video'):
query_string = f'video={video_id}'
elif playlist_id := attrs.get('data-playlist'):
query_string = f'playlist={playlist_id}'
else:
continue
yield update_url(player_url, query=query_string)
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url)
video_id, playlist_id = self._match_valid_url(url).groups()
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
if playlist_id:
if self._yes_playlist(playlist_id, video_id):
return self.url_result(
'http://www.dailymotion.com/playlist/' + playlist_id,
'DailymotionPlaylist', playlist_id)
if is_playlist: # We matched the playlist query param as video_id
playlist_id = video_id
video_id = None
if self._yes_playlist(playlist_id, video_id):
return self.url_result(
f'http://www.dailymotion.com/playlist/{playlist_id}',
'DailymotionPlaylist', playlist_id)
password = self.get_param('videopassword')
media = self._call_api(
@@ -282,6 +372,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
title = metadata['title']
is_live = media.get('isOnAir')
formats = []
subtitles = {}
for quality, media_list in metadata['qualities'].items():
for m in media_list:
media_url = m.get('url')
@@ -289,8 +381,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
continue
if media_type == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
fmt, subs = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
formats.extend(fmt)
self._merge_subtitles(subs, target=subtitles)
else:
f = {
'url': media_url,
@@ -310,20 +404,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
if not f.get('fps') and f['format_id'].endswith('@60'):
f['fps'] = 60
subtitles = {}
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
for subtitle_lang, subtitle in subtitles_data.items():
subtitles[subtitle_lang] = [{
'url': subtitle_url,
} for subtitle_url in subtitle.get('urls', [])]
thumbnails = []
for height, poster_url in metadata.get('posters', {}).items():
thumbnails.append({
'height': int_or_none(height),
'id': height,
'url': poster_url,
})
thumbnails = traverse_obj(metadata, (
('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
'height': (0, {int_or_none}),
'id': (0, {str}),
'url': 1,
}))
owner = metadata.get('owner') or {}
stats = media.get('stats') or {}
@@ -447,7 +539,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {

View File

@@ -319,32 +319,6 @@ class DPlayIE(DPlayBaseIE):
url, display_id, host, 'dplay' + country, country, domain)
class HGTVDeIE(DPlayBaseIE):
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
'info_dict': {
'id': '151205',
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
'ext': 'mp4',
'title': 'Wer braucht schon eine Toilette',
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
'duration': 1177.024,
'timestamp': 1595705400,
'upload_date': '20200725',
'creator': 'HGTV',
'series': 'Tiny House - klein, aber oho',
'season_number': 3,
'episode_number': 3,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
return self._get_disco_api_info(
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
class DiscoveryPlusBaseIE(DPlayBaseIE):
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
@@ -373,6 +347,45 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
class HGTVDeIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
'info_dict': {
'id': '7332936',
'ext': 'mp4',
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
'title': 'Vom Landleben ins Loft',
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
'episode': 'Episode 7',
'episode_number': 7,
'season': 'Season 7',
'season_number': 7,
'series': 'Mein Kleinstadt-Traumhaus',
'duration': 2645.0,
'timestamp': 1725998100,
'upload_date': '20240910',
'creators': ['HGTV'],
'tags': [],
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
},
}]
_PRODUCT = 'hgtv'
_DISCO_API_PARAMS = {
'disco_host': 'eu1-prod.disco-api.com',
'realm': 'hgtv',
'country': 'de',
}
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
'x-disco-params': f'realm={realm}',
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
'Authorization': self._get_auth(disco_base, display_id, realm),
})
class GoDiscoveryIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{

View File

@@ -6,8 +6,10 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
update_url,
update_url_query,
url_basename,
urlencode_postdata,
)
@@ -36,43 +38,58 @@ class DropboxIE(InfoExtractor):
},
]
def _yield_decoded_parts(self, webpage):
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
fn = urllib.parse.unquote(url_basename(url))
title = os.path.splitext(fn)[0]
password = self.get_param('videopassword')
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
or 'Enter the password for this link' in webpage):
for part in self._yield_decoded_parts(webpage):
if '/sm/password' in part:
webpage = self._download_webpage(
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
break
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
or 'Enter the password for this link' in webpage):
if password:
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
response = self._download_json(
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(),
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
data=urlencode_postdata({
'is_xhr': 'true',
't': self._get_cookies('https://www.dropbox.com')['t'].value,
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
'password': password,
'url': url,
}))
if response.get('status') != 'authed':
raise ExtractorError('Authentication failed!', expected=True)
webpage = self._download_webpage(url, video_id)
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
webpage = self._download_webpage(url, video_id)
else:
raise ExtractorError('Invalid password', expected=True)
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
webpage = self._download_webpage(url, video_id)
formats, subtitles, has_anonymous_download = [], {}, False
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
formats, subtitles = [], {}
has_anonymous_download = False
thumbnail = None
for part in self._yield_decoded_parts(webpage):
if not has_anonymous_download:
has_anonymous_download = self._search_regex(
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
transcode_url = self._search_regex(
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
if not transcode_url:
continue
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
thumbnail = self._search_regex(
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
break
# downloads enabled we can get the original file
@@ -89,4 +106,5 @@ class DropboxIE(InfoExtractor):
'title': title,
'formats': formats,
'subtitles': subtitles,
'thumbnail': thumbnail,
}

View File

@@ -139,12 +139,11 @@ class DRTVIE(InfoExtractor):
return
token_response = self._download_json(
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
'https://isl.dr-massive.com/api/authorization/anonymous-sso', None,
note='Downloading anonymous token', headers={
'content-type': 'application/json',
}, query={
'device': 'web_browser',
'ff': 'idp,ldp,rpt',
'device': 'phone_android',
'lang': 'da',
'supportFallbackToken': 'true',
}, data=json.dumps({

View File

@@ -17,6 +17,7 @@ from ..utils import (
url_or_none,
variadic,
)
from ..utils.traversal import traverse_obj
class ERTFlixBaseIE(InfoExtractor):
@@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
def _extract_formats_and_subs(self, video_id):
media_info = self._call_api(video_id, codename=video_id)
formats, subs = [], {}
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
if not fmt_url:
continue
ext = determine_ext(fmt_url)
if ext == 'm3u8':
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
elif ext == 'mpd':
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
fmt_url, video_id, mpd_id='dash', fatal=False)
else:
formats.append({
'url': fmt_url,
'format_id': str_or_none(media.get('Id')),
})
continue
formats.extend(formats_)
self._merge_subtitles(subs_, target=subs)
formats, subtitles = [], {}
for media in traverse_obj(media_info, (
'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
'Formats', lambda _, v: url_or_none(v['Url']))):
fmt_url = media['Url']
ext = determine_ext(fmt_url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
fmt_url, video_id, mpd_id='dash', fatal=False)
else:
formats.append({
'url': fmt_url,
'format_id': str_or_none(media.get('Id')),
})
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return formats, subs
return formats, subtitles
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
class WatchESPNIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
_TESTS = [{
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
'info_dict': {
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
'ext': 'mp4',
'title': 'Huddersfield vs. Burnley',
'duration': 7500,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
'title': 'Abilene Chrstn vs. Texas Tech',
'duration': 14166,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
'info_dict': {
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
'ext': 'mp4',
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
'duration': 8335,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
'title': 'UC Davis vs. California',
'duration': 9547,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
'info_dict': {
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
'ext': 'mp4',
'title': 'The Wheel - Episode 10',
'duration': 3352,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
'title': 'The College Football Show',
'duration': 3639,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
},
'params': {
'skip_download': True,
@@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
if not cookie:
self.raise_login_required(method='cookies')
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
id_token = self._download_json(
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
}).encode())['data']['token']['id_token']
assertion = self._call_bamgrid_api(
'devices', video_id,
headers={'Content-Type': 'application/json; charset=UTF-8'},
@@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
})['access_token']
assertion = self._call_bamgrid_api(
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
'accounts/grant', video_id, payload={'id_token': id_token},
headers={
'Authorization': token,
'Content-Type': 'application/json; charset=UTF-8',

View File

@@ -3,7 +3,12 @@ from ..utils import traverse_obj
class EurosportIE(InfoExtractor):
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
_VALID_URL = r'''(?x)
https?://(?:
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
eurosport\.tvn24\.pl
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
'''
_TESTS = [{
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
'info_dict': {
@@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
'duration': 105.0,
'upload_date': '20230518',
},
}, {
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
'only_matching': True,
}, {
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
'only_matching': True,
}]
_TOKEN = None
@@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
# but this method require to get sha256 hash
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
_GEO_BYPASS = False
def _real_initialize(self):
if EurosportIE._TOKEN is None:
@@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
for stream_type in json_data['attributes']['streaming']:
if stream_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
elif stream_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
elif stream_type == 'mss':
fmts, subs = self._extract_ism_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)

View File

@@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1692346159,
'thumbnail': r're:^https?://.*',
'uploader_id': '100063551323670',
'duration': 3132.184,
'duration': 3133.583,
'view_count': int,
'concurrent_view_count': 0,
},
@@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
'upload_date': '20140506',
'timestamp': 1399398998,
'thumbnail': r're:^https?://.*',
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
'duration': 131.03,
'concurrent_view_count': int,
'view_count': int,
},
}, {
'note': 'Video with DASH manifest',
@@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
# have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': 'ca63897a90c9452efee5f8c40d080e25',
'md5': '1659aa21fb3dd1585874f668e81a72c8',
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
@@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
'view_count': int,
'uploader_id': '100059479812265',
'concurrent_view_count': int,
'duration': 44.478,
'duration': 44.181,
},
}, {
# FIXME: unable to extract uploader, no formats found
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
@@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
'timestamp': 1511548260,
'upload_date': '20171124',
'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
'thumbnail': r're:^https?://.*',
'duration': 148.435,
'duration': 148.224,
},
}, {
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
@@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
'thumbnail': r're:^https?://.*',
'uploader': 'Lela Evans',
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
'upload_date': '20231228',
'timestamp': 1703804085,
'duration': 394.347,
@@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20180523',
'uploader': 'ESL One Dota 2',
'uploader_id': '100066514874195',
'duration': 4524.212,
'duration': 4524.001,
'view_count': int,
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
@@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
'title': 'Josef',
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
'timestamp': 1549275572,
'duration': 3.413,
'duration': 3.283,
'uploader': 'Josef Novak',
'description': '',
'upload_date': '20190204',
@@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
'playlist_count': 1,
'skip': 'Requires logging in',
}, {
# FIXME: Cannot parse data error
# data.event.cover_media_renderer.cover_video
'url': 'https://m.facebook.com/events/1509582499515440',
'info_dict': {
@@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {})
or get_first(post, ('event', 'event_creator', {dict}))
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
uploader = uploader_data.get('name') or (
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
or self._search_regex(
@@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
'like_count': ('likers', 'count', {int}),
'comment_count': ('total_comment_count', {int}),
'repost_count': ('share_count_reduced', {parse_count}),
}), get_all=False),
}
info_json_ld = self._search_json_ld(webpage, video_id, default={})
@@ -555,11 +564,12 @@ class FacebookIE(InfoExtractor):
js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats):
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
dash_manifest = traverse_obj(
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=video.get('dash_manifest_url')))
mpd_url=url_or_none(video.get('dash_manifest_url'))))
def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around
@@ -609,12 +619,13 @@ class FacebookIE(InfoExtractor):
video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info)
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
formats = []
q = qualities(['sd', 'hd'])
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
('browser_native_sd_url', 'sd')):
playable_url = video.get(key)
playable_url = fmt_data.get(key)
if not playable_url:
continue
if determine_ext(playable_url) == 'mpd':
@@ -626,7 +637,7 @@ class FacebookIE(InfoExtractor):
'quality': q(format_id) - 3,
'url': playable_url,
})
extract_dash_manifest(video, formats)
extract_dash_manifest(fmt_data, formats)
if not formats:
# Do not append false positive entry w/o any formats
return
@@ -932,18 +943,21 @@ class FacebookReelIE(InfoExtractor):
_TESTS = [{
'url': 'https://www.facebook.com/reel/1195289147628387',
'md5': 'f13dd37f2633595982db5ed8765474d3',
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
'info_dict': {
'id': '1195289147628387',
'ext': 'mp4',
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
'description': 'md5:22f03309b216ac84720183961441d8db',
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
'uploader': 'Beast Camp Training',
'uploader_id': '100040874179269',
'duration': 9.579,
'timestamp': 1637502609,
'upload_date': '20211121',
'thumbnail': r're:^https?://.*',
'like_count': int,
'comment_count': int,
'repost_count': int,
},
}]
@@ -963,6 +977,7 @@ class FacebookAdsIE(InfoExtractor):
'id': '899206155126718',
'ext': 'mp4',
'title': 'video by Kandao',
'description': 'md5:0822724069e3aca97cbed5dabbab282e',
'uploader': 'Kandao',
'uploader_id': '774114102743284',
'uploader_url': r're:^https?://.*',
@@ -971,6 +986,22 @@ class FacebookAdsIE(InfoExtractor):
'upload_date': '20231214',
'like_count': int,
},
}, {
# key 'watermarked_video_sd_url' missing
'url': 'https://www.facebook.com/ads/library/?id=501152689226254',
'info_dict': {
'id': '501152689226254',
'ext': 'mp4',
'title': 'video by mat.nawrocki',
'description': 'md5:02a446ace7ff8c3c37a2892922492490',
'uploader': 'mat.nawrocki',
'uploader_id': '148586968341456',
'uploader_url': r're:^https?://.*',
'timestamp': 1723452305,
'thumbnail': r're:^https?://.*',
'upload_date': '20240812',
'like_count': int,
},
}, {
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
'info_dict': {
@@ -1017,34 +1048,42 @@ class FacebookAdsIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
post_data = [self._parse_json(j, video_id, fatal=False)
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
data = traverse_obj(post_data, (
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
post_data = traverse_obj(
re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
data = get_first(post_data, (
'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
if not data:
raise ExtractorError('Unable to extract ad data')
title = data.get('title')
if not title or title == '{{product.name}}':
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
markup_id = traverse_obj(data, ('body', '__m', {str}))
markup = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'markup', lambda _, v: v[0].startswith(markup_id),
..., '__html', {clean_html}, {lambda x: not x.startswith('{{product.') and x}, any))
info_dict = traverse_obj(data, {
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
info_dict = merge_dicts({
'title': title,
'description': markup or None,
}, traverse_obj(data, {
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
'uploader': ('page_name', {str}),
'uploader_id': ('page_id', {str_or_none}),
'uploader_url': ('page_profile_uri', {url_or_none}),
'timestamp': ('creation_time', {int_or_none}),
'like_count': ('page_like_count', {int_or_none}),
})
}))
entries = []
for idx, entry in enumerate(traverse_obj(
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1,
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v.get(f)) for f in self._FORMATS_MAP))), 1,
):
entries.append({
'id': f'{video_id}_{idx}',
'title': entry.get('title') or title,
'description': entry.get('link_description') or info_dict.get('description'),
'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
'formats': self._extract_formats(entry),
})

View File

@@ -14,7 +14,7 @@ from ..utils import (
class FC2IE(InfoExtractor):
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
IE_NAME = 'fc2'
_NETRC_MACHINE = 'fc2'
_TESTS = [{

View File

@@ -3,7 +3,7 @@ from .nexx import NexxIE
class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
'md5': '8610449476156f338761a75391b0017d',
@@ -27,6 +27,9 @@ class FunkIE(InfoExtractor):
}, {
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
'only_matching': True,
}, {
'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -8,6 +8,9 @@ from .common import InfoExtractor
from .commonprotocols import RtmpIE
from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring
from ..cookies import LenientSimpleCookie
from ..networking.exceptions import HTTPError
from ..networking.impersonate import ImpersonateTarget
from ..utils import (
KNOWN_EXTENSIONS,
MEDIA_EXTENSIONS,
@@ -2340,7 +2343,7 @@ class GenericIE(InfoExtractor):
default_search = 'fixup_error'
if default_search in ('auto', 'auto_warning', 'fixup_error'):
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
if re.match(r'[^\s/]+\.[^\s/]+/', url):
self.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
elif default_search != 'fixup_error':
@@ -2373,6 +2376,11 @@ class GenericIE(InfoExtractor):
else:
video_id = self._generic_id(url)
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
impersonate = self._configuration_arg('impersonate', ['false'])
if 'false' in impersonate:
impersonate = None
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
@@ -2381,10 +2389,29 @@ class GenericIE(InfoExtractor):
# to accept raw bytes and being able to download only a chunk.
# It may probably better to solve this by checking Content-Type for application/octet-stream
# after a HEAD request, but not sure if we can rely on this.
full_response = self._request_webpage(url, video_id, headers=filter_dict({
'Accept-Encoding': 'identity',
'Referer': smuggled_data.get('referer'),
}))
try:
full_response = self._request_webpage(url, video_id, headers=filter_dict({
'Accept-Encoding': 'identity',
'Referer': smuggled_data.get('referer'),
}), impersonate=impersonate)
except ExtractorError as e:
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
and e.cause.response.get_header('cf-mitigated') == 'challenge'
and e.cause.response.extensions.get('impersonate') is None):
raise
cf_cookie_domain = traverse_obj(
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
('__cf_bm', 'domain'))
if cf_cookie_domain:
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
'how to install the required impersonation dependency, and ')
raise ExtractorError(
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
new_url = full_response.url
if new_url != extract_basic_auth(url)[0]:
self.report_following_redirect(new_url)
@@ -2400,7 +2427,7 @@ class GenericIE(InfoExtractor):
# Check for direct link to a video
content_type = full_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
headers = filter_dict({'Referer': smuggled_data.get('referer')})

View File

@@ -0,0 +1,91 @@
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..utils import (
parse_qs,
traverse_obj,
url_or_none,
)
class GermanupaIE(InfoExtractor):
IE_DESC = 'germanupa.de'
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
'info_dict': {
'id': '909179246',
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
'ext': 'mp4',
'uploader': 'German UPA',
'uploader_id': 'germanupa',
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
'uploader_url': 'https://vimeo.com/germanupa',
'duration': 3987,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
'params': {'skip_download': 'm3u8'},
}, {
'note': 'audio, uses GenericIE',
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
'info_dict': {
'id': '1867346676',
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
'ext': 'opus',
'timestamp': 1720545088,
'upload_date': '20240709',
'duration': 3910.557,
'like_count': int,
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
'uploader': 'German UPA',
'repost_count': int,
'genres': ['Science'],
'license': 'all-rights-reserved',
'uploader_url': 'https://soundcloud.com/user-80097677',
'uploader_id': '471579486',
'view_count': int,
'comment_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
},
}, {
'note': 'Nur für Mitglieder/Just for members',
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
'info_dict': {
'id': '986994430',
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
'ext': 'mp4',
'release_date': '20240719',
'uploader_url': 'https://vimeo.com/germanupa',
'timestamp': 1721373980,
'license': 'by-sa',
'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
'duration': 2146,
'release_timestamp': 1721373980,
'uploader': 'German UPA',
'uploader_id': 'germanupa',
'upload_date': '20240719',
'comment_count': int,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
'skip': 'login required',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
param_url = traverse_obj(
self._search_regex(
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
webpage, 'embedded video', default=None, group='url'),
({parse_qs}, 'url', 0, {url_or_none}))
if not param_url:
if self._search_regex(
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
webpage, 'login wrapper', default=None):
self.raise_login_required('This video is only available for members')
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)

View File

@@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
_VALID_URL = [
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
]
_TESTS = [{
'url': 'http://academymel.online/3video_1',

View File

@@ -7,7 +7,7 @@ from ..utils import (
class GolemIE(InfoExtractor):
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
_TEST = {
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',

View File

@@ -13,7 +13,7 @@ from ..utils import (
class HRFernsehenIE(InfoExtractor):
IE_NAME = 'hrfernsehen'
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
_TESTS = [{
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
'md5': '5c4e0ba94677c516a2f65a84110fc536',

View File

@@ -8,15 +8,19 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
str_or_none,
try_get,
unescapeHTML,
unified_strdate,
update_url_query,
url_or_none,
)
from ..utils.traversal import traverse_obj
class HuyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
IE_NAME = 'huya:live'
IE_DESC = 'huya.com'
TESTS = [{
@@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
'info_dict': {
'id': '572329',
'title': str,
'ext': 'flv',
'description': str,
'is_live': True,
'view_count': int,
@@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
return fm, ss
class HuyaVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
IE_NAME = 'huya:video'
IE_DESC = '虎牙视频'
_TESTS = [{
'url': 'https://www.huya.com/video/play/1002412640.html',
'info_dict': {
'id': '1002412640',
'ext': 'mp4',
'title': '8月3日',
'thumbnail': r're:https?://.*\.jpg',
'duration': 14,
'uploader': '虎牙-ATS欧卡车队青木',
'uploader_id': '1564376151',
'upload_date': '20240803',
'view_count': int,
'comment_count': int,
'like_count': int,
},
},
{
'url': 'https://www.huya.com/video/play/556054543.html',
'info_dict': {
'id': '556054543',
'ext': 'mp4',
'title': '我不挑事 也不怕事',
'thumbnail': r're:https?://.*\.jpg',
'duration': 1864,
'uploader': '卡尔',
'uploader_id': '367138632',
'upload_date': '20210811',
'view_count': int,
'comment_count': int,
'like_count': int,
},
}]
def _real_extract(self, url: str):
video_id = self._match_id(url)
video_data = self._download_json(
'https://liveapi.huya.com/moment/getMomentContent', video_id,
query={'videoId': video_id})['data']['moment']['videoInfo']
formats = []
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
formats.append({
'url': definition['url'],
**traverse_obj(definition, {
'format_id': ('defName', {str}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
return {
'id': video_id,
'formats': formats,
**traverse_obj(video_data, {
'title': ('videoTitle', {str}),
'thumbnail': ('videoCover', {url_or_none}),
'duration': ('videoDuration', {parse_duration}),
'uploader': ('nickName', {str}),
'uploader_id': ('uid', {str_or_none}),
'upload_date': ('videoUploadTime', {unified_strdate}),
'view_count': ('videoPlayNum', {int_or_none}),
'comment_count': ('videoCommentNum', {int_or_none}),
'like_count': ('favorCount', {int_or_none}),
}),
}

View File

@@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor):
class ImgurIE(ImgurBaseIE):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://imgur.com/A61SaA1',
@@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE):
'like_count': int,
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
},
}, {
# Test with URL slug
'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
'title': 'MRW gifv is up and running without any bugs',
'timestamp': 1416446068,
'upload_date': '20141120',
'dislike_count': int,
'comment_count': int,
'release_timestamp': 1416446068,
'release_date': '20141120',
'like_count': int,
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
},
}, {
'url': 'https://i.imgur.com/A61SaA1.gifv',
'only_matching': True,
@@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE):
'comment_count': int,
'release_timestamp': 1710491255,
'release_date': '20240315',
'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg',
},
}]
@@ -208,7 +225,10 @@ class ImgurIE(ImgurBaseIE):
}), get_all=False),
'id': video_id,
'formats': formats,
'thumbnail': url_or_none(search('thumbnailUrl')),
'thumbnails': [{
'url': thumbnail_url,
'http_headers': {'Accept': '*/*'},
}] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None,
'http_headers': {'Accept': '*/*'},
}
@@ -252,17 +272,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE):
class ImgurGalleryIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:gallery'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': {
'id': 'Q95ko',
'title': 'Adding faces make every GIF better',
},
'playlist_count': 25,
'skip': 'Zoinks! You\'ve taken a wrong turn.',
}, {
# TODO: static images - replace with animated/video gallery
'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True,
@@ -280,7 +292,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
'release_timestamp': 1358554297,
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
'release_date': '20130119',
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand',
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
'comment_count': int,
'dislike_count': int,
'like_count': int,
},
}, {
# Test with slug
'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'YcAQlkx',
'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
'timestamp': 1358554297,
'upload_date': '20130119',
'uploader_id': '1648642',
'uploader': 'wittyusernamehere',
'release_timestamp': 1358554297,
'release_date': '20130119',
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
'comment_count': int,
'dislike_count': int,
'like_count': int,
@@ -317,6 +349,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
'title': 'Penguins !',
},
'playlist_count': 3,
}, {
'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ',
'info_dict': {
'id': '6lAn9VQ',
'title': 'Penguins !',
},
'playlist_count': 3,
}, {
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
'add_ies': ['Imgur'],
@@ -357,7 +396,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
class ImgurAlbumIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:album'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
_GALLERY = False
_TESTS = [{
# TODO: only static images - replace with animated/video gallery
@@ -372,6 +411,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE):
'title': 'enen-no-shouboutai',
},
'playlist_count': 2,
}, {
# Test with URL slug
'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX',
'info_dict': {
'id': 'iX265HX',
'title': 'enen-no-shouboutai',
},
'playlist_count': 2,
}, {
'url': 'https://imgur.com/a/8pih2Ed',
'info_dict': {

View File

@@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor):
'X-IG-WWW-Claim': '0',
'Origin': 'https://www.instagram.com',
'Accept': '*/*',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
}
def _perform_login(self, username, password):
@@ -435,10 +434,10 @@ class InstagramIE(InstagramBaseIE):
'X-Requested-With': 'XMLHttpRequest',
'Referer': url,
}, query={
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
'doc_id': '8845758582119845',
'variables': json.dumps(variables, separators=(',', ':')),
})
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
if not general_info:
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)

View File

@@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
'id': 'p51388',
'ext': 'mp4',
'title': 'Partička (92)',
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
'upload_date': '20201103',
'timestamp': 1604437480,
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
'episode': 'Partička (92)',
'season': 'Partička',
'series': 'Prima Partička',
'episode_number': 92,
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
},
'params': {
'skip_download': True, # m3u8 download
},
}, {
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
'info_dict': {
'id': 'p1412199',
'ext': 'mp4',
'episode_number': 3,
'episode': 'Tenerife: V říši ohně',
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
'duration': 3111.0,
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
'title': 'Tenerife: V říši ohně',
'timestamp': 1711825800,
'upload_date': '20240330',
},
'params': {
'skip_download': True, # m3u8 download
@@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
video_id = self._search_regex((
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
), webpage, 'real id', group='id', default=None)
if not video_id:
@@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
final_result = self._search_json_ld(webpage, video_id, default={})
final_result.update({
'id': video_id,
'title': title,
'title': final_result.get('title') or title,
'thumbnail': self._html_search_meta(
['thumbnail', 'og:image', 'twitter:image'],
webpage, 'thumbnail', default=None),

View File

@@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
class SangiinInstructionIE(InfoExtractor):
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
IE_DESC = False # this shouldn't be listed as a supported site
def _real_extract(self, url):
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
raise ExtractorError(
'Copy the link from the button below the video description/player '
'and use that link to download. If there is no button in the frame, '
'get the URL of the frame showing the video.', expected=True)
class SangiinIE(InfoExtractor):

View File

@@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
(?:
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
https?://
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?:
(?:
# flash player

View File

@@ -15,7 +15,7 @@ from ..utils import (
class KhanAcademyBaseIE(InfoExtractor):
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
_PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
def _parse_video(self, video):
return {
@@ -39,7 +39,7 @@ class KhanAcademyBaseIE(InfoExtractor):
query={
'fastly_cacheable': 'persist_until_publish',
'pcv': self._PUBLISHED_CONTENT_VERSION,
'hash': '1242644265',
'hash': '3712657851',
'variables': json.dumps({
'path': display_id,
'countryCode': 'US',

View File

@@ -67,7 +67,7 @@ class KickIE(KickBaseIE):
@classmethod
def suitable(cls, url):
return False if KickClipIE.suitable(url) else super().suitable(url)
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url):
channel = self._match_id(url)
@@ -98,25 +98,25 @@ class KickIE(KickBaseIE):
class KickVODIE(KickBaseIE):
IE_NAME = 'kick:vod'
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c',
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'md5': '3870f94153e40e7121a6e46c068b70cb',
'info_dict': {
'id': 'e74614f4-5270-4319-90ad-32179f19a45c',
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'ext': 'mp4',
'title': r're:❎ MEGA DRAMA ❎ LIVECLICK ❎ ULTIMATE SKILLS .+',
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
'channel': 'xqc',
'channel_id': '668',
'uploader': 'xQc',
'uploader_id': '676',
'upload_date': '20240724',
'timestamp': 1721796562,
'duration': 18566.0,
'upload_date': '20240909',
'timestamp': 1725919141,
'duration': 10155.0,
'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
'categories': ['VALORANT'],
'categories': ['Just Chatting'],
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},
@@ -148,7 +148,7 @@ class KickVODIE(KickBaseIE):
class KickClipIE(KickBaseIE):
IE_NAME = 'kick:clips'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?P<id>clip_[\w-]+)'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
_TESTS = [{
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
'info_dict': {
@@ -189,6 +189,26 @@ class KickClipIE(KickBaseIE):
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
'info_dict': {
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
'ext': 'mp4',
'title': 'KLJASLDJKLJKASDLJKDAS',
'channel': 'spreen',
'channel_id': '5312671',
'uploader': 'AnormalBarraBaja',
'uploader_id': '26518262',
'duration': 43.0,
'upload_date': '20240927',
'timestamp': 1727399987,
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
'view_count': int,
'like_count': int,
'categories': ['Minecraft'],
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):

126
yt_dlp/extractor/kika.py Normal file
View File

@@ -0,0 +1,126 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class KikaIE(InfoExtractor):
IE_DESC = 'KiKA.de'
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
'info_dict': {
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
'ext': 'mp4',
'upload_date': '20240831',
'timestamp': 1725126600,
'season_number': 2024,
'modified_date': '20240831',
'episode': 'Episode 476',
'episode_number': 476,
'season': 'Season 2024',
'duration': 634,
'title': 'logo! vom Samstag, 31. August 2024',
'modified_timestamp': 1725129983,
},
}, {
'url': 'https://www.kika.de/kaltstart/videos/video92498',
'md5': '710ece827e5055094afeb474beacb7aa',
'info_dict': {
'id': 'video92498',
'ext': 'mp4',
'title': '7. Wo ist Leo?',
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
'duration': 436,
'timestamp': 1702926876,
'upload_date': '20231218',
'episode_number': 7,
'modified_date': '20240319',
'modified_timestamp': 1710880610,
'episode': 'Episode 7',
'season_number': 1,
'season': 'Season 1',
},
}, {
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
'info_dict': {
'id': 'video90088',
'ext': 'mp4',
'upload_date': '20221102',
'timestamp': 1667390580,
'duration': 197,
'modified_timestamp': 1711093771,
'episode_number': 8,
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
'modified_date': '20240322',
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
'season_number': 1,
'episode': 'Episode 8',
'season': 'Season 1',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
video_assets = self._download_json(doc['assets']['url'], video_id)
subtitles = {}
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
subtitles['de'] = [{
'url': ttml_resource,
'ext': 'ttml',
}]
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
subtitles.setdefault('de', []).append({
'url': webvtt_resource,
'ext': 'vtt',
})
return {
'id': video_id,
'formats': list(self._extract_formats(video_assets, video_id)),
'subtitles': subtitles,
**traverse_obj(doc, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('date', {parse_iso8601}),
'modified_timestamp': ('modificationDate', {parse_iso8601}),
'duration': ((
('durationInSeconds', {int_or_none}),
('duration', {parse_duration})), any),
'episode_number': ('episodeNumber', {int_or_none}),
'season_number': ('season', {int_or_none}),
}),
}
def _extract_formats(self, media_info, video_id):
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
stream_url = media['url']
ext = determine_ext(stream_url)
if ext == 'm3u8':
yield from self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
yield {
'url': stream_url,
'format_id': ext,
**traverse_obj(media, {
'width': ('frameWidth', {int_or_none}),
'height': ('frameHeight', {int_or_none}),
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
}),
}

View File

@@ -136,6 +136,7 @@ class LBRYBaseIE(InfoExtractor):
class LBRYIE(LBRYBaseIE):
IE_NAME = 'lbry'
IE_DESC = 'odysee.com'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
(?:\$/(?:download|embed)/)?
(?P<id>
@@ -364,6 +365,7 @@ class LBRYIE(LBRYBaseIE):
class LBRYChannelIE(LBRYBaseIE):
IE_NAME = 'lbry:channel'
IE_DESC = 'odysee.com channels'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
_TESTS = [{
'url': 'https://lbry.tv/@LBRYFoundation:0',
@@ -391,6 +393,7 @@ class LBRYChannelIE(LBRYBaseIE):
class LBRYPlaylistIE(LBRYBaseIE):
IE_NAME = 'lbry:playlist'
IE_DESC = 'odysee.com playlists'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
_TESTS = [{
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',

View File

@@ -1,86 +1,11 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
format_field,
int_or_none,
parse_iso8601,
unified_strdate,
)
class LnkGoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
_TESTS = [{
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
'info_dict': {
'id': '10809',
'ext': 'mp4',
'title': "Put'ka: Trys Klausimai",
'upload_date': '20161216',
'description': 'Seniai matytas Putka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
'age_limit': 18,
'duration': 117,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1481904000,
},
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
'info_dict': {
'id': '10467',
'ext': 'mp4',
'title': 'Nėrdas: Kompiuterio Valymas',
'upload_date': '20150113',
'description': 'md5:7352d113a242a808676ff17e69db6a69',
'age_limit': 18,
'duration': 346,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421164800,
},
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
'only_matching': True,
}]
_AGE_LIMITS = {
'N-7': 7,
'N-14': 14,
'S': 18,
}
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups()
video_info = self._download_json(
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
display_id)['videoConfig']['videoInfo']
video_id = str(video_info['id'])
title = video_info['title']
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
formats = self._extract_m3u8_formats(
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
video_id, 'mp4', 'm3u8_native')
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
'duration': int_or_none(video_info.get('duration')),
'description': clean_html(video_info.get('htmlDescription')),
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
'timestamp': parse_iso8601(video_info.get('airDate')),
'view_count': int_or_none(video_info.get('viewsCount')),
}
class LnkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'

View File

@@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
},
'params': {'videopassword': 'seniorinfants2'},
}, {
# embed, transcoded-url endpoint sends empty JSON response
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
'md5': '8488817242a0db1cb2ad0ea522553cf6',
'md5': 'b321d261656848c184a94e3b93eae28d',
'info_dict': {
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
'ext': 'mp4',
@@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
'timestamp': 1657216459,
'duration': 181,
},
'params': {'format': 'bestvideo'}, # Test video-only fixup
'expected_warnings': ['Failed to parse JSON'],
}]
_WEBPAGE_TESTS = [{
@@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
format_url = format_url.replace('-split.m3u8', '.m3u8')
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
# Sometimes only split video/audio formats are available, need to fixup video-only formats
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
for fmt in m3u8_formats:
if is_not_premerged and fmt.get('vcodec') != 'none':
fmt['acodec'] = 'none'
yield {
**fmt,
'url': update_url(fmt['url'], query=query),

View File

@@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
video_data = None
# fix meta_url if missing the host address
if re.match(r'^\/\+\/', meta_url):
if re.match(r'\/\+\/', meta_url):
meta_url = urljoin('https://my.mail.ru', meta_url)
if meta_url:

View File

@@ -13,8 +13,8 @@ from ..utils import (
class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
IE_DESC = 'MDR.DE'
_VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
_GEO_COUNTRIES = ['DE']
@@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
},
'skip': '404 not found',
}, {
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
'id': '19636',
'ext': 'mp4',
'title': 'Baumhaus vom 30. Oktober 2015',
'duration': 134,
'uploader': 'KIKA',
},
'skip': '404 not found',
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
'timestamp': 1482541200,
'upload_date': '20161224',
'duration': 4628,
'uploader': 'KIKA',
},
}, {
# audio with alternative playerURL pattern
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
@@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
'duration': 3239,
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
},
}, {
# empty bitrateVideo and bitrateAudio
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
'info_dict': {
'id': '128372',
'ext': 'mp4',
'title': 'Der kleine Wichtel kehrt zurück',
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
'duration': 4876,
'timestamp': 1607823300,
'upload_date': '20201213',
'uploader': 'ZDF',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'only_matching': True,
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'only_matching': True,
'skip': '404 not found',
}, {
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
'only_matching': True,

View File

@@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
(?P<id>[^/#?_]+)'''
_TESTS = [{
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
'info_dict': {
'id': '668177',
'title': 'Az ajtó',
'display_id': 'az-ajto',
'ext': 'mp4',
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
},
}, {
# (old) mediaklikk. date in html.
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
'info_dict': {
@@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230903',
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
},
'skip': 'Webpage redirects to 404 page',
}, {
# (old) m4sport
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
@@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230908',
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
},
'skip': 'Webpage redirects to 404 page',
}, {
# m4sport with *video/ url and no date
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
@@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
'ext': 'mp4',
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
},
'skip': 'Webpage redirects to 404 page',
}, {
# (old) hirado
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
@@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230911',
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
},
'skip': 'Webpage redirects to video list page',
}, {
# (old) petofilive
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
@@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230909',
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
},
'skip': 'Webpage redirects to video list page',
}]
def _real_extract(self, url):
@@ -143,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
if not playlist_url:
raise ExtractorError('Unable to extract playlist url')
formats = self._extract_wowza_formats(
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
return {
'id': video_id,
'title': title,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'upload_date': upload_date,
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
}

View File

@@ -16,7 +16,7 @@ from ..utils import (
class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV'
IE_NAME = 'MangoTV'

View File

@@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):
class OCWMITIE(InfoExtractor):
IE_NAME = 'ocw.mit.edu'
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
_VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
_BASE_URL = 'http://ocw.mit.edu/'
_TESTS = [

View File

@@ -1,14 +1,13 @@
from .telecinco import TelecincoIE
from .telecinco import TelecincoBaseIE
from ..utils import (
int_or_none,
parse_iso8601,
)
class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
class MiTeleIE(TelecincoBaseIE):
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': {
@@ -27,6 +26,7 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
'timestamp': 1471209401,
'upload_date': '20160814',
},
'skip': 'HTTP Error 404 Not Found',
}, {
# no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
@@ -49,6 +49,26 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
'params': {
'skip_download': True,
},
'skip': 'HTTP Error 404 Not Found',
}, {
'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/',
'info_dict': {
'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f',
'ext': 'mp4',
'title': 'Horizonte Temporada 5 Programa 171',
'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3',
'episode': 'Las Zonas de Bajas Emisiones, a debate',
'episode_number': 171,
'season': 'Season 5',
'season_number': 5,
'series': 'Horizonte',
'duration': 7012,
'upload_date': '20240927',
'timestamp': 1727416450,
'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg',
'age_limit': 12,
},
'params': {'geo_bypass_country': 'ES'},
}, {
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
'only_matching': True,

View File

@@ -0,0 +1,121 @@
from .common import InfoExtractor
from ..utils import js_to_json, remove_end, update_url_query
class MojevideoIE(InfoExtractor):
IE_DESC = 'mojevideo.sk'
_VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P<id>\w+)/(?P<display_id>[\w()]+?)\.html'
_TESTS = [{
'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html',
'md5': '384a4628bd2bbd261c5206cf77c38c17',
'info_dict': {
'id': '3d17c',
'ext': 'mp4',
'title': 'Chlapci dobetónovali sme, máme hotovo!',
'display_id': 'chlapci_dobetonovali_sme_mame_hotovo',
'description': 'md5:a0822126044050d304a9ef58c92ddb34',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg',
'duration': 21.0,
'upload_date': '20230919',
'timestamp': 1695129706,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html',
'md5': '517c3e111c53a67d10b429c1f344ba2f',
'info_dict': {
'id': '14677',
'ext': 'mp4',
'title': 'Deň blbec?',
'display_id': 'den_blbec',
'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg',
'duration': 100.0,
'upload_date': '20120515',
'timestamp': 1337076481,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html',
'md5': '64599a23d3ac31cf2fe069e4353d8162',
'info_dict': {
'id': '2feb2',
'ext': 'mp4',
'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)',
'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)',
'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg',
'duration': 240.0,
'upload_date': '20190708',
'timestamp': 1562576592,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html',
'only_matching': True,
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html',
'only_matching': True,
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html',
'only_matching': True,
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id)
video_id_dec = self._search_regex(
r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16))
video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry')
video_hashes = self._search_json(
r'\bvHash\s*=', webpage, 'video hashes', video_id,
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
formats = []
for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
('', 1, 'normálna kvalita'),
('_lq', 0, 'nízka kvalita'),
('_hd', 2, 'HD-720p'),
('_fhd', 3, 'FULL HD-1080p'),
('_2k', 4, '2K-1440p'),
]):
formats.append({
'format_id': f'mp4-{quality}',
'quality': quality,
'format_note': format_note,
'url': update_url_query(
f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', {
'md5': video_hash,
'expires': video_exp,
}),
})
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'title': (self._og_search_title(webpage, default=None)
or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')),
'description': self._og_search_description(webpage),
**self._search_json_ld(webpage, video_id, default={}),
}

View File

@@ -371,7 +371,7 @@ class NexxIE(InfoExtractor):
# not all videos work via arc, e.g. nexx:741:1269984
if not video:
# Reverse engineered from JS code (see getDeviceID function)
device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(1e4, 99999)}{random.randint(1, 9)}'
device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(10000, 99999)}{random.randint(1, 9)}'
result = self._call_api(domain_id, 'session/init', video_id, data={
'nxp_devh': device_id,

View File

@@ -11,9 +11,12 @@ from ..utils import (
clean_html,
determine_ext,
get_element_by_class,
traverse_obj,
int_or_none,
make_archive_id,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class NFLBaseIE(InfoExtractor):
@@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor):
'osVersion': '10.0',
}, separators=(',', ':')).encode()).decode(),
'networkType': 'other',
'nflClaimGroupsToAdd': [],
'nflClaimGroupsToRemove': [],
'peacockUUID': 'undefined',
}
_ACCOUNT_INFO = {}
_API_KEY = None
_API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
_TOKEN = None
_TOKEN_EXPIRY = 0
def _get_account_info(self, url, slug):
if not self._API_KEY:
webpage = self._download_webpage(url, slug, fatal=False) or ''
self._API_KEY = self._search_regex(
r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
def _get_account_info(self):
cookies = self._get_cookies('https://auth-id.nfl.com/')
login_token = traverse_obj(cookies, (
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
@@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor):
'or else try using --cookies-from-browser instead', expected=True)
account = self._download_json(
'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
'https://auth-id.nfl.com/accounts.getAccountInfo', None,
note='Downloading account info', data=urlencode_postdata({
'include': 'profile,data',
'lang': 'en',
@@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor):
'sdk': 'js_latest',
'login_token': login_token,
'authMode': 'cookie',
'pageURL': url,
'pageURL': 'https://www.nfl.com/',
'sdkBuild': traverse_obj(cookies, (
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
'format': 'json',
@@ -126,55 +122,78 @@ class NFLBaseIE(InfoExtractor):
if len(self._ACCOUNT_INFO) != 3:
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
def _get_auth_token(self, url, slug):
def _get_auth_token(self):
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
return
if not self._ACCOUNT_INFO:
self._get_account_info(url, slug)
token = self._download_json(
'https://api.nfl.com/identity/v3/token%s' % (
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
self._TOKEN = token['accessToken']
self._TOKEN_EXPIRY = token['expiresIn']
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
def _extract_video(self, mcp_id, is_live=False):
self._get_auth_token()
data = self._download_json(
f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
'Authorization': f'Bearer {self._TOKEN}',
'Accept': 'application/json',
'Content-Type': 'application/json',
}, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode())
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
return {
'id': mcp_id,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
'_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
**traverse_obj(data, ('metadata', {
'title': ('event', ('def_title', 'friendlyName'), {str}, any),
'description': ('event', 'def_description', {str}),
'duration': ('event', 'duration', {int_or_none}),
'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
})),
}
def _parse_video_config(self, video_config, display_id):
video_config = self._parse_json(video_config, display_id)
is_live = traverse_obj(video_config, ('live', {bool})) or False
item = video_config['playlist'][0]
mcp_id = item.get('mcpID')
if mcp_id:
info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
if mcp_id := item.get('mcpID'):
return self._extract_video(mcp_id, is_live=is_live)
info = {'id': item.get('id') or item['entityId']}
item_url = item['url']
ext = determine_ext(item_url)
if ext == 'm3u8':
info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4')
else:
media_id = item.get('id') or item['entityId']
title = item.get('title')
item_url = item['url']
info = {'id': media_id}
ext = determine_ext(item_url)
if ext == 'm3u8':
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
else:
info['url'] = item_url
if item.get('audio') is True:
info['vcodec'] = 'none'
is_live = video_config.get('live') is True
thumbnails = None
image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
if image_url:
thumbnails = [{
'url': image_url,
'ext': determine_ext(image_url, 'jpg'),
}]
info.update({
'title': title,
'is_live': is_live,
'description': clean_html(item.get('description')),
'thumbnails': thumbnails,
})
info['url'] = item_url
if item.get('audio') is True:
info['vcodec'] = 'none'
thumbnails = None
if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none):
thumbnails = [{
'url': image_url,
'ext': determine_ext(image_url, 'jpg'),
}]
info.update({
**traverse_obj(item, {
'title': ('title', {str}),
'description': ('description', {clean_html}),
}),
'is_live': is_live,
'thumbnails': thumbnails,
})
return info
@@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE):
'ext': 'mp4',
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'NFL',
'tags': 'count:6',
'thumbnail': r're:https?://.+\.jpg',
'duration': 157,
'categories': 'count:3',
'_old_archive_ids': ['anvato 899441'],
},
}, {
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
'md5': '6886b32c24b463038c760ceb55a34566',
'md5': '92a517f05bd3eb50fe50244bc621aec8',
'info_dict': {
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
'ext': 'mp3',
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
'description': 'md5:12ada8ee70e6762658c30e223e095075',
'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855',
},
'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
'only_matching': True,
@@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
entries = []
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
entries.append(self._parse_video_config(video_config, display_id))
def entries():
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
yield self._parse_video_config(video_config, display_id)
title = clean_html(get_element_by_class(
'nfl-c-article__title', webpage)) or self._html_search_meta(
['og:title', 'twitter:title'], webpage)
return self.playlist_result(entries, display_id, title)
return self.playlist_result(entries(), display_id, title)
class NFLPlusReplayIE(NFLBaseIE):
@@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE):
'all_22': 'All-22',
}
def _real_initialize(self):
self._get_account_info()
def _real_extract(self, url):
slug, video_id = self._match_valid_url(url).group('slug', 'id')
requested_types = self._configuration_arg('type', ['all'])
@@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE):
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
if not video_id:
self._get_auth_token(url, slug)
self._get_auth_token()
headers = {'Authorization': f'Bearer {self._TOKEN}'}
game_id = self._download_json(
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
@@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE):
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
if video_id:
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
return self._extract_video(video_id)
def entries():
for replay in traverse_obj(
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
):
video_id = replay['mcpPlaybackId']
yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
yield self._extract_video(replay['mcpPlaybackId'])
return self.playlist_result(entries(), slug)
@@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE):
'params': {'skip_download': 'm3u8'},
}]
def _real_initialize(self):
self._get_account_info()
def _real_extract(self, url):
slug = self._match_id(url)
self._get_auth_token(url, slug)
self._get_auth_token()
video_id = self._download_json(
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
'Authorization': f'Bearer {self._TOKEN}',
})['mcpPlaybackId']
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
return self._extract_video(video_id)

View File

@@ -420,7 +420,7 @@ class NiconicoIE(InfoExtractor):
'x-request-with': 'https://www.nicovideo.jp',
})['data']['contentUrl']
# Getting all audio formats results in duplicate video formats which we filter out later
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id)
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
@@ -432,7 +432,6 @@ class NiconicoIE(InfoExtractor):
'asr': ('samplingRate', {int_or_none}),
}), get_all=False),
'acodec': 'aac',
'ext': 'm4a',
}
# Sort before removing dupes to keep the format dicts with the lowest tbr
@@ -870,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE):
class NiconicoUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])'
_TEST = {
'url': 'https://www.nicovideo.jp/user/419948',
'info_dict': {
@@ -878,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
},
'playlist_mincount': 101,
}
_API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
_API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
_PAGE_SIZE = 100
_API_HEADERS = {
@@ -898,12 +897,13 @@ class NiconicoUserIE(InfoExtractor):
total_count = int_or_none(json_parsed['data'].get('totalCount'))
for entry in json_parsed['data']['items']:
count += 1
yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id']))
yield self.url_result(
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
page_num += 1
def _real_extract(self, url):
list_id = self._match_id(url)
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
return self.playlist_result(self._entries(list_id), list_id)
class NiconicoLiveIE(InfoExtractor):

View File

@@ -43,14 +43,8 @@ class NoodleMagazineIE(InfoExtractor):
def build_url(url_or_path):
return urljoin('https://adult.noodlemagazine.com', url_or_path)
headers = {'Referer': url}
player_path = self._html_search_regex(
r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path')
player_iframe = self._download_webpage(
build_url(player_path), video_id, 'Downloading iframe page', headers=headers)
playlist_url = self._search_regex(
r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers)
playlist_info = self._search_json(
r'window\.playlist\s*=', webpage, video_id, 'playlist info')
formats = []
for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):

View File

@@ -10,7 +10,7 @@ from ..utils import (
class NZOnScreenIE(InfoExtractor):
_VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
'info_dict': {

View File

@@ -1,9 +1,6 @@
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
)
class NZZIE(InfoExtractor):
@@ -22,19 +19,14 @@ class NZZIE(InfoExtractor):
'playlist_count': 1,
}]
def _entries(self, webpage, page_id):
for script in re.findall(r'(?s)<script[^>]* data-hid="jw-video-jw[^>]+>(.+?)</script>', webpage):
settings = self._search_json(r'var\s+settings\s*=[^{]*', script, 'settings', page_id, fatal=False)
if entry := self._parse_jwplayer_data(settings, page_id):
yield entry
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
entries = []
for player_element in re.findall(
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
player_params = extract_attributes(player_element)
if player_params.get('data-type') not in ('kaltura_singleArticle',):
self.report_warning('Unsupported player type')
continue
entry_id = player_params['data-id']
entries.append(self.url_result(
'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
return self.playlist_result(entries, page_id)
return self.playlist_result(self._entries(webpage, page_id), page_id)

View File

@@ -1,3 +1,4 @@
import functools
import itertools
import urllib.parse
@@ -22,13 +23,19 @@ from ..utils import (
class PatreonBaseIE(InfoExtractor):
USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
@functools.cached_property
def patreon_user_agent(self):
# Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
# Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
if self._get_cookies('https://www.patreon.com/').get('session_id'):
return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
if headers is None:
headers = {}
if 'User-Agent' not in headers:
headers['User-Agent'] = self.USER_AGENT
headers['User-Agent'] = self.patreon_user_agent
if query:
query.update({'json-api-version': 1.0})
@@ -48,6 +55,7 @@ class PatreonBaseIE(InfoExtractor):
class PatreonIE(PatreonBaseIE):
IE_NAME = 'patreon'
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.patreon.com/creation?hid=743933',
@@ -111,6 +119,7 @@ class PatreonIE(PatreonBaseIE):
'comment_count': int,
'channel_is_verified': True,
'chapters': 'count:4',
'timestamp': 1423689666,
},
'params': {
'noplaylist': True,
@@ -221,6 +230,7 @@ class PatreonIE(PatreonBaseIE):
'thumbnail': r're:^https?://.+',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# multiple attachments/embeds
'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
@@ -326,8 +336,13 @@ class PatreonIE(PatreonBaseIE):
if embed_url and (urlh := self._request_webpage(
embed_url, video_id, 'Checking embed URL', headers=headers,
fatal=False, errnote=False, expected_status=403)):
# Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
# to check for "Sorry, we couldn&amp;rsquo;t find that page" in the meta description tag
meta_description = clean_html(self._html_search_meta(
'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None))
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
if ((urlh.status != 403 and meta_description != 'Sorry, we couldnt find that page')
or VidsIoIE.suitable(embed_url)):
entries.append(self.url_result(smuggle_url(embed_url, headers)))
post_file = traverse_obj(attributes, ('post_file', {dict}))
@@ -419,15 +434,19 @@ class PatreonIE(PatreonBaseIE):
class PatreonCampaignIE(PatreonBaseIE):
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P<campaign_id>\d+))|(?P<vanity>[-\w]+))'
IE_NAME = 'patreon:campaign'
_VALID_URL = r'''(?x)
https?://(?:www\.)?patreon\.com/(?:
(?:m|api/campaigns)/(?P<campaign_id>\d+)|
(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
)(?:/posts)?/?(?:$|[?#])'''
_TESTS = [{
'url': 'https://www.patreon.com/dissonancepod/',
'info_dict': {
'title': 'Cognitive Dissonance Podcast',
'channel_url': 'https://www.patreon.com/dissonancepod',
'id': '80642',
'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7',
'description': r're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*',
'channel_id': '80642',
'channel': 'Cognitive Dissonance Podcast',
'age_limit': 0,
@@ -442,31 +461,46 @@ class PatreonCampaignIE(PatreonBaseIE):
'url': 'https://www.patreon.com/m/4767637/posts',
'info_dict': {
'title': 'Not Just Bikes',
'channel_follower_count': int,
'id': '4767637',
'channel_id': '4767637',
'channel_url': 'https://www.patreon.com/notjustbikes',
'description': 'md5:595c6e7dca76ae615b1d38c298a287a1',
'description': r're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*',
'age_limit': 0,
'channel': 'Not Just Bikes',
'uploader_url': 'https://www.patreon.com/notjustbikes',
'uploader': 'Not Just Bikes',
'uploader': 'Jason',
'uploader_id': '37306634',
'thumbnail': r're:^https?://.*$',
},
'playlist_mincount': 71,
}, {
'url': 'https://www.patreon.com/api/campaigns/4243769/posts',
'info_dict': {
'title': 'Second Thought',
'channel_follower_count': int,
'id': '4243769',
'channel_id': '4243769',
'channel_url': 'https://www.patreon.com/secondthought',
'description': r're:(?s).*Second Thought is an educational YouTube channel.*',
'age_limit': 0,
'channel': 'Second Thought',
'uploader_url': 'https://www.patreon.com/secondthought',
'uploader': 'JT Chapman',
'uploader_id': '32718287',
'thumbnail': r're:^https?://.*$',
},
'playlist_mincount': 201,
}, {
'url': 'https://www.patreon.com/dissonancepod/posts',
'only_matching': True,
}, {
'url': 'https://www.patreon.com/m/5932659',
'only_matching': True,
}, {
'url': 'https://www.patreon.com/api/campaigns/4243769',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if PatreonIE.suitable(url) else super().suitable(url)
def _entries(self, campaign_id):
cursor = None
params = {
@@ -493,7 +527,7 @@ class PatreonCampaignIE(PatreonBaseIE):
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
if campaign_id is None:
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
campaign_id = self._search_nextjs_data(
webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']

View File

@@ -109,7 +109,7 @@ class PinterestBaseIE(InfoExtractor):
class PinterestIE(PinterestBaseIE):
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?P<id>\d+)'
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?:[\w-]+--)?(?P<id>\d+)'
_TESTS = [{
# formats found in data['videos']
'url': 'https://www.pinterest.com/pin/664281013778109217/',
@@ -174,6 +174,25 @@ class PinterestIE(PinterestBaseIE):
}, {
'url': 'https://co.pinterest.com/pin/824721750502199491/',
'only_matching': True,
},
{
'url': 'https://pinterest.com/pin/dive-into-serenity-blue-lagoon-pedi-nails-for-a-tranquil-and-refreshing-spa-experience-video-in-2024--2885187256207927',
'info_dict': {
'id': '2885187256207927',
'ext': 'mp4',
'title': 'Dive into Serenity: Blue Lagoon Pedi Nails for a Tranquil and Refreshing Spa Experience! 💙💅',
'description': 'md5:5da41c767d2317e42e49b663b0b2150f',
'uploader': 'Glamour Artistry |Everyday Outfits, Luxury Fashion & Nail Designs',
'uploader_id': '1142999717836434688',
'upload_date': '20240702',
'timestamp': 1719939156,
'duration': 7.967,
'comment_count': int,
'repost_count': int,
'categories': 'count:9',
'tags': ['#BlueLagoonPediNails', '#SpaExperience'],
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
}]
def _real_extract(self, url):

View File

@@ -1,5 +1,6 @@
import functools
from .common import InfoExtractor
from ..compat import functools
from ..utils import (
int_or_none,
parse_duration,

View File

@@ -628,8 +628,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
for e in page_entries:
yield e
yield from page_entries
if not self._has_more(webpage):
break

View File

@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
join_nonempty,
time_seconds,
try_call,
unified_timestamp,
@@ -167,7 +168,7 @@ class RadikoBaseIE(InfoExtractor):
class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)'
_TESTS = [{
# QRR (文化放送) station provides <desc>
@@ -183,8 +184,9 @@ class RadikoIE(RadikoBaseIE):
}]
def _real_extract(self, url):
station, video_id = self._match_valid_url(url).groups()
vid_int = unified_timestamp(video_id, False)
station, timestring = self._match_valid_url(url).group('station', 'timestring')
video_id = join_nonempty(station, timestring)
vid_int = unified_timestamp(timestring, False)
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
auth_token, area_id = self._auth_client()
@@ -207,7 +209,7 @@ class RadikoIE(RadikoBaseIE):
'ft': radio_begin,
'end_at': radio_end,
'to': radio_end,
'seek': video_id,
'seek': timestring,
},
),
}

View File

@@ -16,7 +16,7 @@ from ..utils import (
class RadioFranceIE(InfoExtractor):
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
_VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
IE_NAME = 'radiofrance'
_TEST = {

View File

@@ -1,3 +1,4 @@
import json
import urllib.parse
from .common import InfoExtractor
@@ -17,7 +18,7 @@ from ..utils import (
class RedditIE(InfoExtractor):
_NETRC_MACHINE = 'reddit'
_VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
_VALID_URL = r'https?://(?:\w+\.)?reddit(?:media)?\.com/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
'info_dict': {
@@ -251,15 +252,15 @@ class RedditIE(InfoExtractor):
return {'en': [{'url': caption_url}]}
def _real_extract(self, url):
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
slug, video_id = self._match_valid_url(url).group('slug', 'id')
data = self._download_json(
f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
if not data:
fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
self.to_screen(f'{host} request failed, retrying with {fallback_host}')
try:
data = self._download_json(
f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
f'https://www.reddit.com/{slug}/.json', video_id, expected_status=403)
except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError):
self.raise_login_required('Account authentication is required')
raise
if traverse_obj(data, 'error') == 403:
reason = data.get('reason')

View File

@@ -6,7 +6,7 @@ from ..utils import (
class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',

View File

@@ -8,7 +8,7 @@ from ..utils import js_to_json
class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
'md5': 'e736ce0c665e459ddb818546220b4ef8',
@@ -19,9 +19,25 @@ class RTPIE(InfoExtractor):
'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril',
'md5': '9a81ed53f2b2197cfa7ed455b12f8ade',
'info_dict': {
'id': 'e757904',
'ext': 'mp4',
'title': '25 Curiosidades, 25 de Abril',
'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr',
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,
}, {
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano',
'only_matching': True,
}, {
'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon',
'only_matching': True,
}]
_RX_OBFUSCATION = re.compile(r'''(?xs)
@@ -49,17 +65,17 @@ class RTPIE(InfoExtractor):
f, config = self._search_regex(
r'''(?sx)
var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*
(?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)?
var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
''', webpage,
'player config', group=('f', 'config'))
f = self._parse_json(
f, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
config = self._parse_json(
config, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
f = config['file'] if not f else self._parse_json(
f, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
formats = []
if isinstance(f, dict):

View File

@@ -8,14 +8,17 @@ from ..utils import (
UnsupportedError,
clean_html,
determine_ext,
extract_attributes,
format_field,
get_element_by_class,
get_elements_html_by_class,
int_or_none,
join_nonempty,
parse_count,
parse_iso8601,
traverse_obj,
unescapeHTML,
urljoin,
)
@@ -382,8 +385,10 @@ class RumbleChannelIE(InfoExtractor):
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
break
raise
for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
yield self.url_result('https://rumble.com' + video_url)
for video_url in traverse_obj(
get_elements_html_by_class('videostream__link', webpage), (..., {extract_attributes}, 'href'),
):
yield self.url_result(urljoin('https://rumble.com', video_url))
def _real_extract(self, url):
url, playlist_id = self._match_valid_url(url).groups()

View File

@@ -6,6 +6,7 @@ from ..utils import (
determine_ext,
int_or_none,
parse_qs,
traverse_obj,
try_get,
unified_timestamp,
url_or_none,
@@ -80,6 +81,8 @@ class RutubeBaseIE(InfoExtractor):
'url': format_url,
'format_id': format_id,
})
for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False))
return formats
def _download_and_extract_formats(self, video_id, query=None):
@@ -90,7 +93,7 @@ class RutubeBaseIE(InfoExtractor):
class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_VALID_URL = r'https?://rutube\.ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
_TESTS = [{
@@ -164,6 +167,29 @@ class RutubeIE(RutubeBaseIE):
'uploader': 'Стас Быков',
},
'expected_warnings': ['Unable to download f4m'],
}, {
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
'info_dict': {
'id': 'c58f502c7bb34a8fcdd976b221fca292',
'ext': 'mp4',
'categories': ['Телепередачи'],
'description': '',
'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
'live_status': 'is_live',
'age_limit': 0,
'uploader_id': '23460655',
'timestamp': 1652972968,
'view_count': int,
'upload_date': '20220519',
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader': 'Первый канал',
},
}, {
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
'only_matching': True,
}, {
'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
'only_matching': True,
}]
@classmethod

View File

@@ -36,7 +36,7 @@ class SampleFocusIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, display_id, impersonate=True)
sample_id = self._search_regex(
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
@@ -82,7 +82,15 @@ class SampleFocusIE(InfoExtractor):
return {
'id': sample_id,
'title': title,
'url': mp3_url,
'formats': [{
'url': mp3_url,
'ext': 'mp3',
'vcodec': 'none',
'acodec': 'mp3',
'http_headers': {
'Referer': url,
},
}],
'display_id': display_id,
'thumbnail': thumbnail,
'uploader': uploader,

View File

@@ -0,0 +1,33 @@
from .common import InfoExtractor
class ScreenRecIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?screenrec\.com/share/(?P<id>\w{10})'
_TESTS = [{
'url': 'https://screenrec.com/share/DasLtbknYo',
'info_dict': {
'id': 'DasLtbknYo',
'ext': 'mp4',
'title': '02.05.2024_03.01.25_REC',
'description': 'Recorded with ScreenRec',
'thumbnail': r're:^https?://.*\.gif$',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8_url = self._search_regex(
r'customUrl\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 URL', group='url')
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'),
}

36
yt_dlp/extractor/sen.py Normal file
View File

@@ -0,0 +1,36 @@
from .common import InfoExtractor
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class SenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sen\.com/video/(?P<id>[0-9a-f-]+)'
_TEST = {
'url': 'https://www.sen.com/video/eef46eb1-4d79-4e28-be9d-bd937767f8c4',
'md5': 'ff615aca9691053c94f8f10d96cd7884',
'info_dict': {
'id': 'eef46eb1-4d79-4e28-be9d-bd937767f8c4',
'ext': 'mp4',
'description': 'Florida, 28 Sep 2022',
'title': 'Hurricane Ian',
'tags': ['North America', 'Storm', 'Weather'],
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
api_data = self._download_json(f'https://api.sen.com/content/public/video/{video_id}', video_id)
m3u8_url = (traverse_obj(api_data, (
'data', 'nodes', lambda _, v: v['id'] == 'player', 'video', 'url', {url_or_none}, any))
or f'https://vod.sen.com/videos/{video_id}/manifest.m3u8')
return {
'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
**traverse_obj(api_data, ('data', 'nodes', lambda _, v: v['id'] == 'details', any, 'content', {
'title': ('title', 'text', {str}),
'description': ('descriptions', 0, 'text', {str}),
'tags': ('badges', ..., 'text', {str}),
})),
}

View File

@@ -27,7 +27,7 @@ class ServusIE(InfoExtractor):
'info_dict': {
'id': 'AA-28BYCQNH92111',
'ext': 'mp4',
'title': 'Klettersteige in den Alpen',
'title': 'Vie Ferrate - Klettersteige in den Alpen',
'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2823,
@@ -38,6 +38,7 @@ class ServusIE(InfoExtractor):
'season_number': 11,
'episode': 'Episode 8 - Vie Ferrate Klettersteige in den Alpen',
'episode_number': 8,
'categories': ['Bergwelten'],
},
'params': {'skip_download': 'm3u8'},
}, {
@@ -71,8 +72,11 @@ class ServusIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url).upper()
webpage = self._download_webpage(url, video_id)
next_data = self._search_nextjs_data(webpage, video_id, fatal=False)
video = self._download_json(
'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin',
'https://api-player.redbull.com/stv/servus-tv-playnet',
video_id, 'Downloading video JSON', query={'videoId': video_id})
if not video.get('videoUrl'):
self._report_errors(video)
@@ -89,7 +93,7 @@ class ServusIE(InfoExtractor):
return {
'id': video_id,
'title': video.get('title'),
'description': self._get_description(video_id) or video.get('description'),
'description': self._get_description(next_data) or video.get('description'),
'thumbnail': video.get('poster'),
'duration': float_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('currentSunrise')),
@@ -100,16 +104,19 @@ class ServusIE(InfoExtractor):
'episode_number': episode_number,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(next_data, ('props', 'pageProps', 'data', {
'title': ('title', 'rendered', {str}),
'timestamp': ('stv_date', 'raw', {int}),
'duration': ('stv_duration', {float_or_none}),
'categories': ('category_names', ..., {str}),
})),
}
def _get_description(self, video_id):
info = self._download_json(
f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page',
video_id, fatal=False)
return join_nonempty(*traverse_obj(info, (
('stv_short_description', 'stv_long_description'),
{lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
def _get_description(self, next_data):
return join_nonempty(*traverse_obj(next_data, (
'props', 'pageProps', 'data',
('stv_short_description', 'stv_long_description'), {str},
{lambda x: x.replace('\n\n', '\n')}, {unescapeHTML})), delim='\n\n')
def _report_errors(self, video):
playability_errors = traverse_obj(video, ('playabilityErrors', ...))

View File

@@ -0,0 +1,76 @@
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, url_or_none
from ..utils.traversal import traverse_obj
class SnapchatSpotlightIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?snapchat\.com/spotlight/(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
'md5': '46c580f63592d0cbb76e974d2f9f0fcc',
'info_dict': {
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
'ext': 'mp4',
'title': 'Views 💕',
'description': '',
'thumbnail': r're:https://cf-st\.sc-cdn\.net/d/kKJHIR1QAznRKK9jgYYDq\.256\.IRZXSOY',
'duration': 4.665,
'timestamp': 1637777831.369,
'upload_date': '20211124',
'repost_count': int,
'uploader': 'shreypatel57',
'uploader_url': 'https://www.snapchat.com/add/shreypatel57',
},
}, {
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
'md5': '4cd9626458c1a0e3e6dbe72c544a9ec2',
'info_dict': {
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
'ext': 'mp4',
'title': 'Spotlight Snap',
'description': 'How he flirt her teacher🤭🤭🤩😍 #kdrama#cdrama #dramaclips #dramaspotlight',
'thumbnail': r're:https://cf-st\.sc-cdn\.net/i/ztfr6xFs0FOcFhwVczWfj\.256\.IRZXSOY',
'duration': 10.91,
'timestamp': 1722720291.307,
'upload_date': '20240803',
'view_count': int,
'repost_count': int,
'uploader': 'ganda0535',
'uploader_url': 'https://www.snapchat.com/add/ganda0535',
'tags': ['#dramaspotlight', '#dramaclips', '#cdrama', '#kdrama'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
page_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
video_data = traverse_obj(page_props, (
'spotlightFeed', 'spotlightStories',
lambda _, v: v['story']['storyId']['value'] == video_id, 'metadata', any), None)
return {
'id': video_id,
'ext': 'mp4',
**traverse_obj(video_data, ('videoMetadata', {
'title': ('name', {str}),
'description': ('description', {str}),
'timestamp': ('uploadDateMs', {lambda x: float_or_none(x, 1000)}),
'view_count': ('viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
'repost_count': ('shareCount', {int_or_none}),
'url': ('contentUrl', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'duration': ('durationMs', {lambda x: float_or_none(x, 1000)}),
'thumbnail': ('thumbnailUrl', {url_or_none}),
'uploader': ('creator', 'personCreator', 'username', {str}),
'uploader_url': ('creator', 'personCreator', 'url', {url_or_none}),
})),
**traverse_obj(video_data, {
'description': ('description', {str}),
'tags': ('hashtags', ..., {str}),
'view_count': ('engagementStats', 'viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
'repost_count': ('engagementStats', 'shareCount', {int_or_none}),
}),
}

View File

@@ -208,7 +208,6 @@ class SoundcloudBaseIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
track_id = str(info['id'])
title = info['title']
format_urls = set()
formats = []
@@ -367,7 +366,7 @@ class SoundcloudBaseIE(InfoExtractor):
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
'uploader_url': user.get('permalink_url'),
'timestamp': unified_timestamp(info.get('created_at')),
'title': title,
'title': info.get('title'),
'description': info.get('description'),
'thumbnails': thumbnails,
'duration': float_or_none(info.get('duration'), 1000),
@@ -377,7 +376,8 @@ class SoundcloudBaseIE(InfoExtractor):
'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
'formats': formats if not extract_flat else None,
}
@@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
'uploader_url': 'https://soundcloud.com/ethmusic',
'genres': [],
},
},
# geo-restricted
@@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'uploader_url': 'https://soundcloud.com/the-concept-band',
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
'genres': ['Alternative'],
'artists': ['The Royal Concept'],
},
},
# private link
@@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'repost_count': int,
'view_count': int,
'genres': ['Dance & EDM'],
'artists': ['80M'],
},
},
# private link, downloadable format
@@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
'uploader_url': 'https://soundcloud.com/oriuplift',
'genres': ['Trance'],
'artists': ['Ori Uplift'],
},
},
# no album art, use avatar pic for thumbnail
@@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'comment_count': int,
'repost_count': int,
'uploader_url': 'https://soundcloud.com/garyvee',
'genres': [],
'artists': ['MadReal'],
},
'params': {
'skip_download': True,

View File

@@ -2,7 +2,13 @@ import re
import urllib.parse
from .common import InfoExtractor
from ..utils import js_to_json, str_or_none, traverse_obj
from ..networking import HEADRequest
from ..utils import (
determine_ext,
js_to_json,
str_or_none,
)
from ..utils.traversal import traverse_obj
class SubstackIE(InfoExtractor):
@@ -43,6 +49,19 @@ class SubstackIE(InfoExtractor):
'uploader': "Andrew Zimmern's Spilled Milk ",
'uploader_id': '577659',
},
}, {
# Podcast that needs its file extension resolved to mp3
'url': 'https://persuasion1.substack.com/p/summers',
'md5': '1456a755d46084744facdfac9edf900f',
'info_dict': {
'id': '141970405',
'ext': 'mp3',
'title': 'Larry Summers on What Went Wrong on Campus',
'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.',
'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg',
'uploader': 'Persuasion',
'uploader_id': '61579',
},
}]
@classmethod
@@ -89,7 +108,15 @@ class SubstackIE(InfoExtractor):
post_type = webpage_info['post']['type']
formats, subtitles = [], {}
if post_type == 'podcast':
formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
fmt = {'url': webpage_info['post']['podcast_url']}
if not determine_ext(fmt['url'], default_ext=None):
# The redirected format URL expires but the original URL doesn't,
# so we only want to extract the extension from this request
fmt['ext'] = determine_ext(self._request_webpage(
HEADRequest(fmt['url']), display_id,
'Resolving podcast file extension',
'Podcast URL is invalid').url)
formats.append(fmt)
elif post_type == 'video':
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
else:

View File

@@ -472,7 +472,7 @@ class SVTPageIE(SVTBaseIE):
title = self._og_search_title(webpage)
urql_state = self._search_json(
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}

View File

@@ -8,7 +8,7 @@ from ..utils import (
class Tele13IE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_TESTS = [
{
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',

View File

@@ -2,15 +2,69 @@ import json
import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
join_nonempty,
str_or_none,
try_get,
traverse_obj,
update_url,
url_or_none,
)
class TelecincoIE(InfoExtractor):
class TelecincoBaseIE(InfoExtractor):
def _parse_content(self, content, url):
video_id = content['dataMediaId']
config = self._download_json(
content['dataConfig'], video_id, 'Downloading config JSON')
services = config['services']
caronte = self._download_json(services['caronte'], video_id)
if traverse_obj(caronte, ('dls', 0, 'drm', {bool})):
self.report_drm(video_id)
stream = caronte['dls'][0]['stream']
headers = {
'Referer': url,
'Origin': re.match(r'https?://[^/]+', url).group(0),
}
geo_headers = {**headers, **self.geo_verification_headers()}
try:
cdn = self._download_json(
caronte['cerbero'], video_id, data=json.dumps({
'bbx': caronte['bbx'],
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
}).encode(), headers={
'Content-Type': 'application/json',
**geo_headers,
})['tokens']['1']['cdn']
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
error_code = traverse_obj(
self._webpage_read_content(error.cause.response, caronte['cerbero'], video_id, fatal=False),
({json.loads}, 'code', {int}))
if error_code == 4038:
self.raise_geo_restricted(countries=['ES'])
raise
formats = self._extract_m3u8_formats(
update_url(stream, query=cdn), video_id, 'mp4', m3u8_id='hls', headers=geo_headers)
return {
'id': video_id,
'title': traverse_obj(config, ('info', 'title', {str})),
'formats': formats,
'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none}))
or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)),
'duration': traverse_obj(content, ('dataDuration', {int_or_none})),
'http_headers': headers,
}
class TelecincoIE(TelecincoBaseIE):
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
@@ -30,6 +84,7 @@ class TelecincoIE(InfoExtractor):
'duration': 662,
},
}],
'skip': 'HTTP Error 410 Gone',
}, {
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
@@ -40,23 +95,24 @@ class TelecincoIE(InfoExtractor):
'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
'duration': 79,
},
'skip': 'Redirects to main page',
}, {
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
'md5': 'eddb50291df704ce23c74821b995bcac',
'md5': '5ce057f43f30b634fbaf0f18c71a140a',
'info_dict': {
'id': 'aywerkD2Sv1vGNqq9b85Q2',
'ext': 'mp4',
'title': '#DOYLACARA. Con la trata no hay trato',
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
'duration': 50,
'thumbnail': 'https://album.mediaset.es/eimg/2017/11/02/1tlQLO5Q3mtKT24f3EaC24.jpg',
},
}, {
# video in opening's content
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
'info_dict': {
'id': '2907195140',
'id': '1691427',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'description': 'md5:73f340a7320143d37ab895375b2bf13a',
'description': r're:Fiorella, la sobrina de Edmundo Arrocet, concedió .{727}',
},
'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7',
@@ -65,6 +121,7 @@ class TelecincoIE(InfoExtractor):
'ext': 'mp4',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'duration': 1015,
'thumbnail': 'https://album.mediaset.es/eimg/2020/02/29/5opaC37lUhKlZ7FoDhiVC.jpg',
},
}],
'params': {
@@ -81,66 +138,29 @@ class TelecincoIE(InfoExtractor):
'only_matching': True,
}]
def _parse_content(self, content, url):
video_id = content['dataMediaId']
config = self._download_json(
content['dataConfig'], video_id, 'Downloading config JSON')
title = config['info']['title']
services = config['services']
caronte = self._download_json(services['caronte'], video_id)
stream = caronte['dls'][0]['stream']
headers = self.geo_verification_headers()
headers.update({
'Content-Type': 'application/json;charset=UTF-8',
'Origin': re.match(r'https?://[^/]+', url).group(0),
})
cdn = self._download_json(
caronte['cerbero'], video_id, data=json.dumps({
'bbx': caronte['bbx'],
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
}).encode(), headers=headers)['tokens']['1']['cdn']
formats = self._extract_m3u8_formats(
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
'duration': int_or_none(content.get('dataDuration')),
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
article = self._parse_json(self._search_regex(
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
webpage, 'article'), display_id)['article']
title = article.get('title')
description = clean_html(article.get('leadParagraph')) or ''
article = self._search_json(
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
webpage, 'article', display_id)['article']
description = traverse_obj(article, ('leadParagraph', {clean_html}, filter))
if article.get('editorialType') != 'VID':
entries = []
body = [article.get('opening')]
body.extend(try_get(article, lambda x: x['body'], list) or [])
for p in body:
if not isinstance(p, dict):
continue
content = p.get('content')
if not content:
continue
for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])):
content = p['content']
type_ = p.get('type')
if type_ == 'paragraph':
content_str = str_or_none(content)
if content_str:
description += content_str
continue
if type_ == 'video' and isinstance(content, dict):
if type_ == 'paragraph' and isinstance(content, str):
description = join_nonempty(description, content, delim='')
elif type_ == 'video' and isinstance(content, dict):
entries.append(self._parse_content(content, url))
return self.playlist_result(
entries, str_or_none(article.get('id')), title, description)
content = article['opening']['content']
info = self._parse_content(content, url)
info.update({
'description': description,
})
entries, str_or_none(article.get('id')),
traverse_obj(article, ('title', {str})), clean_html(description))
info = self._parse_content(article['opening']['content'], url)
info['description'] = description
return info

View File

@@ -1,33 +1,31 @@
import base64
import datetime as dt
import functools
import itertools
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin
from ..utils import int_or_none, traverse_obj, url_or_none, urljoin
class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
_NETRC_MACHINE = '10play'
_TESTS = [{
'url': 'https://10play.com.au/neighbours/web-extras/season-39/nathan-borg-is-the-first-aussie-actor-with-a-cochlear-implant-to-join-neighbours/tpv210128qupwd',
'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz',
'info_dict': {
'id': '6226844312001',
'id': '6336940246112',
'ext': 'mp4',
'title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
'duration': 186,
'season': 'Season 39',
'season_number': 39,
'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach',
'duration': 74,
'season': 'Season 41',
'season_number': 41,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'uploader': 'Channel 10',
'age_limit': 15,
'timestamp': 1611810000,
'upload_date': '20210128',
'timestamp': 1694386800,
'upload_date': '20230910',
'uploader_id': '2199827728001',
},
'params': {
@@ -35,21 +33,30 @@ class TenPlayIE(InfoExtractor):
},
'skip': 'Only available in Australia',
}, {
'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh',
'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
'info_dict': {
'id': '6192880312001',
'id': '9000000000091177',
'ext': 'mp4',
'title': "Todd Sampson's Body Hack - S4 Ep. 2",
'description': 'md5:fa278820ad90f08ea187f9458316ac74',
'title': 'Neighbours - S42 Ep. 9107',
'alt_title': 'Thu 05 Sep',
'description': 'md5:37a1f4271be34b9ee2b533426a5fbaef',
'duration': 1388,
'episode': 'Episode 9107',
'episode_number': 9107,
'season': 'Season 42',
'season_number': 42,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'age_limit': 15,
'timestamp': 1600770600,
'upload_date': '20200922',
'timestamp': 1725517860,
'upload_date': '20240905',
'uploader': 'Channel 10',
'uploader_id': '2199827728001',
},
'params': {
'skip_download': True,
},
'skip': 'Only available in Australia',
}, {
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
'only_matching': True,
@@ -66,55 +73,42 @@ class TenPlayIE(InfoExtractor):
'X': 18,
}
def _get_bearer_token(self, video_id):
username, password = self._get_login_info()
if username is None or password is None:
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
_timestamp = dt.datetime.now().strftime('%Y%m%d000000')
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
'X-Network-Ten-Auth': _auth_header,
}, data=urlencode_postdata({
'email': username,
'password': password,
}))
return 'Bearer ' + data['jwt']['accessToken']
def _real_extract(self, url):
content_id = self._match_id(url)
data = self._download_json(
'https://10play.com.au/api/v1/videos/' + content_id, content_id)
headers = {}
if data.get('memberGated') is True:
_token = self._get_bearer_token(content_id)
headers = {'Authorization': _token}
_video_url = self._download_json(
data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
headers=headers).get('source')
m3u8_url = self._request_webpage(HEADRequest(
_video_url), content_id).url
video_data = self._download_json(
f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}',
content_id, 'Downloading video JSON')
m3u8_url = self._request_webpage(
HEADRequest(video_data['items'][0]['HLSURL']),
content_id, 'Checking stream URL').url
if '10play-not-in-oz' in m3u8_url:
self.raise_geo_restricted(countries=['AU'])
# Attempt to get a higher quality stream
m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000')
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
return {
'id': content_id,
'formats': formats,
'subtitles': {'en': [{'url': data.get('captionUrl')}]} if data.get('captionUrl') else None,
'id': data.get('altId') or content_id,
'duration': data.get('duration'),
'title': data.get('subtitle'),
'alt_title': data.get('title'),
'description': data.get('description'),
'age_limit': self._AUS_AGES.get(data.get('classification')),
'series': data.get('tvShow'),
'season_number': int_or_none(data.get('season')),
'episode_number': int_or_none(data.get('episode')),
'timestamp': data.get('published'),
'thumbnail': data.get('imageUrl'),
'subtitles': {'en': [{'url': data['captionUrl']}]} if url_or_none(data.get('captionUrl')) else None,
'uploader': 'Channel 10',
'uploader_id': '2199827728001',
**traverse_obj(data, {
'id': ('altId', {str}),
'duration': ('duration', {int_or_none}),
'title': ('subtitle', {str}),
'alt_title': ('title', {str}),
'description': ('description', {str}),
'age_limit': ('classification', {self._AUS_AGES.get}),
'series': ('tvShow', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'timestamp': ('published', {int_or_none}),
'thumbnail': ('imageUrl', {url_or_none}),
}),
}

View File

@@ -542,16 +542,12 @@ class TikTokBaseIE(InfoExtractor):
**COMMON_FORMAT_INFO,
'format_id': 'download',
'url': self._proto_relative_url(download_url),
'format_note': 'watermarked',
'preference': -2,
})
self._remove_duplicate_formats(formats)
for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
f.update({
'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
'preference': f.get('preference') or -2,
})
# Is it a slideshow with only audio for download?
if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})):
audio_url = aweme_detail['music']['playUrl']
@@ -565,7 +561,8 @@ class TikTokBaseIE(InfoExtractor):
'vcodec': 'none',
})
return formats
# Filter out broken formats, see https://github.com/yt-dlp/yt-dlp/issues/11034
return [f for f in formats if urllib.parse.urlparse(f['url']).hostname != 'www.tiktok.com']
def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False):
author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {

View File

@@ -236,7 +236,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
},
},
],
'min_playlist_count': 4,
'playlist_mincount': 4,
}]
def _real_extract(self, url):

View File

@@ -6,6 +6,7 @@ from ..utils import (
ExtractorError,
int_or_none,
js_to_json,
strip_or_none,
traverse_obj,
url_or_none,
urlencode_postdata,
@@ -132,12 +133,12 @@ class TubiTvIE(InfoExtractor):
return {
'id': video_id,
'title': title,
'title': strip_or_none(title),
'formats': formats,
'subtitles': subtitles,
'season_number': int_or_none(season_number),
'episode_number': int_or_none(episode_number),
'episode': episode_title,
'episode': strip_or_none(episode_title),
**traverse_obj(video_data, {
'description': ('description', {str}),
'duration': ('duration', {int_or_none}),

View File

@@ -6,11 +6,12 @@ from ..utils import (
str_or_none,
strip_or_none,
traverse_obj,
update_url_query,
)
class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'skip': 'videos are only available for 7 days',
'url': 'https://tver.jp/episodes/ep83nf3w4p',
@@ -21,80 +22,115 @@ class TVerIE(InfoExtractor):
'episode': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着',
'alt_title': '売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着',
'channel': 'テレビ朝日',
'id': 'ep83nf3w4p',
'ext': 'mp4',
'onair_label': '5月3日(火)放送分',
'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP財前直見×森泉親子の脱東京暮らし密着 テレビ朝日 5月3日(火)放送分',
},
'add_ie': ['BrightcoveNew'],
}, {
'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
'info_dict': {
'id': '6359578055112',
'ext': 'mp4',
'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
'timestamp': 1722279928,
'upload_date': '20240729',
'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
'uploader_id': '4774017240001',
'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
'duration': 670.571,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://tver.jp/corner/f0103888',
'only_matching': True,
}, {
'url': 'https://tver.jp/lp/f0033031',
'only_matching': True,
}, {
'url': 'https://tver.jp/series/srtxft431v',
'info_dict': {
'id': 'srtxft431v',
'title': '名探偵コナン',
},
'playlist': [
{
'md5': '779ffd97493ed59b0a6277ea726b389e',
'info_dict': {
'id': 'ref:conan-1137-241005',
'ext': 'mp4',
'title': '名探偵コナン #1137「行列店、味変の秘密」',
'uploader_id': '5330942432001',
'tags': [],
'channel': '読売テレビ',
'series': '名探偵コナン',
'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
'episode': '#1137「行列店、味変の秘密」',
'duration': 1469.077,
'timestamp': 1728030405,
'upload_date': '20241004',
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
'thumbnail': r're:https://.+\.jpg',
},
}],
}, {
'url': 'https://tver.jp/series/sru35hwdd2',
'info_dict': {
'id': 'sru35hwdd2',
'title': '神回だけ見せます!',
},
'playlist_count': 11,
}, {
'url': 'https://tver.jp/series/srkq2shp9d',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
_PLATFORM_UID = None
_PLATFORM_TOKEN = None
_HEADERS = {'x-tver-platform-type': 'web'}
_PLATFORM_QUERY = {}
def _real_initialize(self):
create_response = self._download_json(
'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None,
note='Creating session', data=b'device_type=pc', headers={
'Origin': 'https://s.tver.jp',
'Referer': 'https://s.tver.jp/',
'Content-Type': 'application/x-www-form-urlencoded',
session_info = self._download_json(
'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
None, 'Creating session', data=b'device_type=pc')
self._PLATFORM_QUERY = traverse_obj(session_info, ('result', {
'platform_uid': 'platform_uid',
'platform_token': 'platform_token',
}))
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
return self._download_json(
f'https://platform-api.tver.jp/service/api/{path}', video_id, note,
fatal=fatal, headers=self._HEADERS, query={
**self._PLATFORM_QUERY,
**(query or {}),
})
self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid'))
self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token'))
def _yield_episode_ids_for_series(self, series_id):
seasons_info = self._download_json(
f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
series_id, 'Downloading seasons info', headers=self._HEADERS)
for season_id in traverse_obj(
seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})):
episodes_info = self._call_platform_api(
f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info')
yield from traverse_obj(episodes_info, (
'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str}))
def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type')
if video_type == 'olympic/paris2024/video':
# Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
# https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
return self.url_result(smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
{'geo_countries': ['JP']}), 'BrightcoveNew')
if video_type == 'series':
series_info = self._call_platform_api(
f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
return self.playlist_from_matches(
self._yield_episode_ids_for_series(video_id), video_id,
traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}')
elif video_type not in {'series', 'episodes'}:
if video_type != 'episodes':
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id(self._search_regex(
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
webpage, 'url regex'))
episode_info = self._download_json(
f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
video_id, fatal=False,
query={
'platform_uid': self._PLATFORM_UID,
'platform_token': self._PLATFORM_TOKEN,
}, headers={
'x-tver-platform-type': 'web',
episode_info = self._call_platform_api(
f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={
'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
})
episode_content = traverse_obj(
episode_info, ('result', 'episode', 'content')) or {}
version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
video_info = self._download_json(
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
query={
'v': str_or_none(episode_content.get('version')) or '5',
}, headers={
'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/',
})
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
query={'v': version}, headers={'Referer': 'https://tver.jp/'})
p_id = video_info['video']['accountID']
r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False)
if not r_id:
@@ -110,6 +146,23 @@ class TVerIE(InfoExtractor):
provider = str_or_none(episode_content.get('productionProviderName'))
onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
thumbnails = [
{
'id': quality,
'url': update_url_query(
f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
{'v': version}),
'width': width,
'height': height,
}
for quality, width, height in [
('small', 480, 270),
('medium', 640, 360),
('large', 960, 540),
('xlarge', 1280, 720),
]
]
return {
'_type': 'url_transparent',
'title': title,
@@ -119,6 +172,7 @@ class TVerIE(InfoExtractor):
'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
'channel': provider,
'description': str_or_none(video_info.get('description')),
'thumbnails': thumbnails,
'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
'ie_key': 'BrightcoveNew',

View File

@@ -8,7 +8,7 @@ from ..utils import (
class TVN24IE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:(?!eurosport)[^/]+\.)?tvn24(?:bis)?\.pl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
'md5': 'fbdec753d7bc29d96036808275f2130c',

View File

@@ -270,7 +270,7 @@ class TwitCastingLiveIE(InfoExtractor):
class TwitCastingUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
'info_dict': {

View File

@@ -150,14 +150,6 @@ class TwitterBaseIE(InfoExtractor):
def is_logged_in(self):
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
# XXX: Temporary workaround until twitter.com => x.com migration is completed
def _real_initialize(self):
if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
return
# User has not yet been migrated to x.com and has passed twitter.com cookies
TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
@functools.cached_property
def _selected_api(self):
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
@@ -934,14 +926,13 @@ class TwitterIE(TwitterBaseIE):
'uploader_id': 'MoniqueCamarra',
'live_status': 'was_live',
'release_timestamp': 1658417414,
'description': 'md5:acce559345fd49f129c20dbcda3f1201',
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
'timestamp': 1658407771,
'release_date': '20220721',
'upload_date': '20220721',
},
'add_ie': ['TwitterSpaces'],
'params': {'skip_download': 'm3u8'},
'skip': 'Requires authentication',
}, {
# URL specifies video number but --yes-playlist
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
@@ -1764,7 +1755,7 @@ class TwitterSpacesIE(TwitterBaseIE):
'release_timestamp': 1659904215,
'release_date': '20220807',
},
'params': {'skip_download': 'm3u8'},
'skip': 'No longer available',
}, {
# post_live/TimedOut but downloadable
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
@@ -1780,6 +1771,8 @@ class TwitterSpacesIE(TwitterBaseIE):
'upload_date': '20230413',
'release_timestamp': 1681839000,
'release_date': '20230418',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
},
'params': {'skip_download': 'm3u8'},
}, {
@@ -1790,11 +1783,31 @@ class TwitterSpacesIE(TwitterBaseIE):
'ext': 'm4a',
'title': '',
'description': 'Twitter Space participated by nobody yet',
'uploader': '息根とめる🔪Twitchで復活',
'uploader': '息根とめる',
'uploader_id': 'tomeru_ikinone',
'live_status': 'was_live',
'timestamp': 1685617198,
'upload_date': '20230601',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
},
'params': {'skip_download': 'm3u8'},
}, {
# Video Space
'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
'info_dict': {
'id': '1DXGydznBYWKM',
'ext': 'mp4',
'title': 'America and Israels “special relationship”',
'description': 'Twitter Space participated by nobody yet',
'uploader': 'Candace Owens',
'uploader_id': 'RealCandaceO',
'live_status': 'was_live',
'timestamp': 1723931351,
'upload_date': '20240817',
'release_timestamp': 1723932000,
'release_date': '20240817',
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
},
'params': {'skip_download': 'm3u8'},
}]
@@ -1834,8 +1847,6 @@ class TwitterSpacesIE(TwitterBaseIE):
def _real_extract(self, url):
space_id = self._match_id(url)
if not self.is_logged_in:
self.raise_login_required('Twitter Spaces require authentication')
space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
if not space_data:
raise ExtractorError('Twitter Space not found', expected=True)
@@ -1854,13 +1865,17 @@ class TwitterSpacesIE(TwitterBaseIE):
source = traverse_obj(
self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
headers=headers, fatal=False) if source else []
for fmt in formats:
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
if not is_live:
fmt['container'] = 'm4a_dash'
is_audio_space = source and 'audio-space' in source
formats = self._extract_m3u8_formats(
source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
# XXX: Some audio-only Spaces need ffmpeg as downloader
entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
live=is_live, headers=headers, fatal=False) if source else []
if is_audio_space:
for fmt in formats:
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
if not is_live:
fmt['container'] = 'm4a_dash'
participants = ', '.join(traverse_obj(
space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'

View File

@@ -73,7 +73,7 @@ class UstreamIE(InfoExtractor):
def num_to_hex(n):
return hex(n)[2:]
rnd = random.randrange
rnd = lambda x: random.randrange(int(x))
if not extra_note:
extra_note = ''

View File

@@ -8,7 +8,8 @@ from ..utils import (
int_or_none,
parse_duration,
qualities,
try_get,
remove_start,
strip_or_none,
)
@@ -108,7 +109,7 @@ class VeohIE(InfoExtractor):
categories = metadata.get('categoryPath')
if not categories:
category = try_get(video, lambda x: x['category'].strip().removeprefix('category_'))
category = remove_start(strip_or_none(video.get('category')), 'category_')
categories = [category] if category else None
tags = video.get('tags')

148
yt_dlp/extractor/vidflex.py Normal file
View File

@@ -0,0 +1,148 @@
import base64
import json
from .common import InfoExtractor
from ..utils import (
int_or_none,
join_nonempty,
mimetype2ext,
url_or_none,
)
from ..utils.traversal import traverse_obj
class VidflexIE(InfoExtractor):
_DOMAINS_RE = [
r'[^.]+\.vidflex\.tv',
r'(?:www\.)?acactv\.ca',
r'(?:www\.)?albertalacrossetv\.com',
r'(?:www\.)?cjfltv\.com',
r'(?:www\.)?figureitoutbaseball\.com',
r'(?:www\.)?ocaalive\.com',
r'(?:www\.)?pegasussports\.tv',
r'(?:www\.)?praxisseries\.ca',
r'(?:www\.)?silenticetv\.com',
r'(?:www\.)?tuffhedemantv\.com',
r'(?:www\.)?watchfuntv\.com',
r'live\.ofsaa\.on\.ca',
r'tv\.procoro\.ca',
r'tv\.realcastmedia\.net',
r'tv\.fringetheatre\.ca',
r'video\.haisla\.ca',
r'video\.hockeycanada\.ca',
r'video\.huuayaht\.org',
r'video\.turningpointensemble\.ca',
r'videos\.livingworks\.net',
r'videos\.telusworldofscienceedmonton\.ca',
r'watch\.binghamtonbulldogs\.com',
r'watch\.rekindle\.tv',
r'watch\.wpca\.com',
]
_VALID_URL = rf'https?://(?:{"|".join(_DOMAINS_RE)})/[a-z]{{2}}(?:-[a-z]{{2}})?/c/[\w-]+\.(?P<id>\d+)'
_TESTS = [{
'url': 'https://video.hockeycanada.ca/en/c/nwt-micd-up-with-jamie-lee-rattray.107486',
'only_matching': True,
}, {
# m3u8 + https
'url': 'https://video.hockeycanada.ca/en-us/c/nwt-micd-up-with-jamie-lee-rattray.107486',
'info_dict': {
'id': '107486',
'title': 'NWT: Micd up with Jamie Lee Rattray',
'ext': 'mp4',
'duration': 115,
'timestamp': 1634310409,
'upload_date': '20211015',
'tags': ['English', '2021', "National Women's Team"],
'description': 'md5:efb1cf6165b48cc3f5555c4262dd5b23',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
'url': 'https://video.hockeycanada.ca/en/c/mwc-remembering-the-wild-ride-in-riga.112307',
'info_dict': {
'id': '112307',
'title': 'MWC: Remembering the wild ride in Riga',
'ext': 'mp4',
'duration': 322,
'timestamp': 1716235607,
'upload_date': '20240520',
'tags': ['English', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
'description': r're:.+Canadas National Mens Team.+',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
# the same video in French
'url': 'https://video.hockeycanada.ca/fr/c/cmm-retour-sur-un-parcours-endiable-a-riga.112304',
'info_dict': {
'id': '112304',
'title': 'CMM : Retour sur un parcours endiablé à Riga',
'ext': 'mp4',
'duration': 322,
'timestamp': 1716235545,
'upload_date': '20240520',
'tags': ['French', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
'description': 'md5:cf825222882a3dab1cd62cffcf3b4d1f',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
'url': 'https://myfbcgreenville.vidflex.tv/en/c/may-12th-2024.658',
'only_matching': True,
}, {
'url': 'https://www.figureitoutbaseball.com/en/c/fiob-podcast-14-dan-bertolini-ncaa-d1-head-coach-recorded-11-29-2018.1367',
'only_matching': True,
}, {
'url': 'https://videos.telusworldofscienceedmonton.ca/en/c/the-aurora-project-timelapse-4.577',
'only_matching': True,
}, {
'url': 'https://www.tuffhedemantv.com/en/c/2022-tuff-hedeman-tour-hobbs-nm-january-22.227',
'only_matching': True,
}, {
'url': 'https://www.albertalacrossetv.com/en/c/up-floor-ground-balls-one-more.3449',
'only_matching': True,
}, {
'url': 'https://www.silenticetv.com/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
'only_matching': True,
}, {
'url': 'https://jphl.vidflex.tv/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_url = self._html_search_regex(
r'content_api:\s*(["\'])(?P<url>https?://(?:(?!\1).)+)\1', webpage, 'content api url', group='url')
media_config = traverse_obj(
self._download_json(data_url, video_id),
('config', {base64.b64decode}, {bytes.decode}, {json.loads}, {dict}))
return {
'id': video_id,
'formats': list(self._yield_formats(media_config, video_id)),
**self._search_json_ld(
webpage.replace('/*<![CDATA[*/', '').replace('/*]]>*/', ''), video_id),
}
def _yield_formats(self, media_config, video_id):
for media_source in traverse_obj(media_config, ('media', 'source', lambda _, v: url_or_none(v['src']))):
media_url = media_source['src']
media_type = mimetype2ext(media_source.get('type'))
if media_type == 'm3u8':
yield from self._extract_m3u8_formats(media_url, video_id, fatal=False, m3u8_id='hls')
elif media_type == 'mp4':
bitrate = self._search_regex(r'_(\d+)k\.mp4', media_url, 'bitrate', default=None)
yield {
'format_id': join_nonempty('http', bitrate),
'url': media_url,
'ext': 'mp4',
'tbr': int_or_none(bitrate),
}
else:
yield {
'url': media_url,
'ext': media_type,
}

View File

@@ -21,6 +21,7 @@ from ..utils import (
parse_filesize,
parse_iso8601,
parse_qs,
qualities,
smuggle_url,
str_or_none,
traverse_obj,
@@ -146,6 +147,8 @@ class VimeoBaseInfoExtractor(InfoExtractor):
})
# TODO: fix handling of 308 status code returned for live archive manifest requests
QUALITIES = ('low', 'medium', 'high')
quality = qualities(QUALITIES)
sep_pattern = r'/sep/video/'
for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
@@ -166,6 +169,11 @@ class VimeoBaseInfoExtractor(InfoExtractor):
m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
note=f'Downloading {cdn_name} m3u8 information',
fatal=False)
# m3u8 doesn't give audio bitrates; need to prioritize based on GROUP-ID
# See: https://github.com/yt-dlp/yt-dlp/issues/10854
for f in fmts:
if mobj := re.search(rf'audio-({"|".join(QUALITIES)})', f['format_id']):
f['quality'] = quality(mobj.group(1))
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif files_type == 'dash':
@@ -234,13 +242,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
}
def _extract_original_format(self, url, video_id, unlisted_hash=None):
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
return self._download_json(
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json',
}, query={
'fields': ','.join((
'config_url', 'created_time', 'description', 'download', 'license',
'metadata.connections.comments.total', 'metadata.connections.likes.total',
'release_time', 'stats.plays')),
}, **kwargs)
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
# Original/source formats are only available when logged in
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
return
query = {'action': 'load_download_config'}
if unlisted_hash:
query['unlisted_hash'] = unlisted_hash
download_data = self._download_json(
url, video_id, fatal=False, query=query,
headers={'X-Requested-With': 'XMLHttpRequest'},
url, video_id, 'Loading download config JSON', fatal=False,
query=query, headers={'X-Requested-With': 'XMLHttpRequest'},
expected_status=(403, 404)) or {}
source_file = download_data.get('source_file')
download_url = try_get(source_file, lambda x: x['download_url'])
@@ -261,15 +286,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'quality': 1,
}
jwt_response = self._download_json(
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
if not jwt_response.get('jwt'):
jwt = jwt or traverse_obj(self._download_json(
'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
if not jwt:
return
headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'}
original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False, expected_status=(403, 404)) or {}
for download_data in original_response.get('download') or []:
original_response = api_data or self._call_videos_api(
video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
download_url = download_data.get('link')
if not download_url or download_data.get('quality') != 'source':
continue
@@ -354,7 +377,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'skip': 'No longer available',
},
{
'url': 'http://player.vimeo.com/video/54469442',
'url': 'https://player.vimeo.com/video/54469442',
'md5': '619b811a4417aa4abe78dc653becf511',
'note': 'Videos that embed the url in the player page',
'info_dict': {
@@ -370,6 +393,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': {
'format': 'best[protocol=https]',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'http://vimeo.com/68375962',
@@ -379,22 +403,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '68375962',
'ext': 'mp4',
'title': 'youtube-dl password protected test video',
'timestamp': 1371200155,
'timestamp': 1371214555,
'upload_date': '20130614',
'release_timestamp': 1371214555,
'release_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
'view_count': int,
'comment_count': int,
'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
},
'params': {
'format': 'best[protocol=https]',
'videopassword': 'youtube-dl',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'http://vimeo.com/channels/keypeele/75629013',
@@ -418,29 +443,38 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': int,
},
'params': {'format': 'http-1080p'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'http://vimeo.com/76979871',
'note': 'Video with subtitles',
'info_dict': {
'id': '76979871',
'ext': 'mov',
'ext': 'mp4',
'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
'timestamp': 1381846109,
'description': str, # FIXME: Dynamic SEO spam description
'timestamp': 1381860509,
'upload_date': '20131015',
'release_timestamp': 1381860509,
'release_date': '20131015',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
'uploader_id': 'staff',
'uploader': 'Vimeo Staff',
'uploader': 'Vimeo',
'duration': 62,
'comment_count': int,
'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/452001751-8216e0571c251a09d7a8387550942d89f7f86f6398f8ed886e639b0dd50d3c90-d_1280',
'subtitles': {
'de': [{'ext': 'vtt'}],
'en': [{'ext': 'vtt'}],
'es': [{'ext': 'vtt'}],
'fr': [{'ext': 'vtt'}],
'de': 'count:3',
'en': 'count:3',
'es': 'count:3',
'fr': 'count:3',
},
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'expected_warnings': [
'Ignoring subtitle tracks found in the HLS manifest',
'Failed to parse XML: not well-formed',
],
},
{
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
@@ -456,11 +490,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 118,
'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# contains original format
# contains Original format
'url': 'https://vimeo.com/33951933',
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
# 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
'info_dict': {
'id': '33951933',
'ext': 'mp4',
@@ -476,15 +511,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
'like_count': int,
'tags': 'count:11',
},
# 'params': {'format': 'Original'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'note': 'Contains original format not accessible in webpage',
'note': 'Contains source format not accessible in webpage',
'url': 'https://vimeo.com/393756517',
'md5': 'c464af248b592190a5ffbb5d33f382b0',
# 'md5': 'c464af248b592190a5ffbb5d33f382b0',
'info_dict': {
'id': '393756517',
'ext': 'mov',
# 'ext': 'mov',
'ext': 'mp4',
'timestamp': 1582642091,
'uploader_id': 'frameworkla',
'title': 'Straight To Hell - Sabrina: Netflix',
@@ -495,6 +534,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
'uploader_url': 'https://vimeo.com/frameworkla',
},
# 'params': {'format': 'source'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# only available via https://vimeo.com/channels/tributes/6213729 and
@@ -511,16 +552,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
'channel_id': 'tributes',
'timestamp': 1250886430,
'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
'description': str, # FIXME: Dynamic SEO spam description
'duration': 321,
'comment_count': int,
'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
'like_count': int,
'tags': ['[the shining', 'vimeohq', 'cv', 'vimeo tribute]'],
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# redirects to ondemand extractor and should be passed through it
@@ -543,28 +586,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
'skip': 'this page is no longer available.',
},
{
'url': 'http://player.vimeo.com/video/68375962',
'url': 'https://player.vimeo.com/video/68375962',
'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
'info_dict': {
'id': '68375962',
'ext': 'mp4',
'title': 'youtube-dl password protected test video',
'timestamp': 1371200155,
'upload_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
'view_count': int,
'comment_count': int,
'like_count': int,
},
'params': {
'format': 'best[protocol=https]',
'videopassword': 'youtube-dl',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
@@ -592,7 +630,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
'uploader': 'Philipp Hagemeister',
'uploader_id': 'user20132939',
'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
'description': str, # FIXME: Dynamic SEO spam description
'upload_date': '20150209',
'timestamp': 1423518307,
'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
@@ -606,6 +644,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'format': 'best[protocol=https]',
'videopassword': 'youtube-dl',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# source file returns 403: Forbidden
@@ -633,11 +672,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'release_date': '20160329',
},
'params': {'skip_download': True},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'https://vimeo.com/138909882',
'info_dict': {
'id': '138909882',
# 'ext': 'm4v',
'ext': 'mp4',
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
@@ -645,11 +686,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Firework Champions',
'upload_date': '20150910',
'timestamp': 1441901895,
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d_1280',
'uploader_url': 'https://vimeo.com/fireworkchampions',
'tags': 'count:6',
'duration': 229,
'view_count': int,
'like_count': int,
'comment_count': int,
},
'params': {
'skip_download': True,
'format': 'Original',
# 'format': 'source',
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
'url': 'https://vimeo.com/channels/staffpicks/143603739',
@@ -670,8 +719,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': int,
'uploader_url': 'https://vimeo.com/karimhd',
'channel_url': 'https://vimeo.com/channels/staffpicks',
'tags': 'count:6',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# requires passing unlisted_hash(a52724358e) to load_download_config request
@@ -701,6 +752,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# chapters must be sorted, see: https://github.com/yt-dlp/yt-dlp/issues/5308
@@ -735,6 +787,48 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# vimeo.com URL with unlisted hash and Original format
'url': 'https://vimeo.com/144579403/ec02229140',
# 'md5': '6b662c2884e0373183fbde2a0d15cb78',
'info_dict': {
'id': '144579403',
'ext': 'mp4',
'title': 'SALESMANSHIP',
'description': 'md5:4338302f347a1ff8841b4a3aecaa09f0',
'uploader': 'Off the Picture Pictures',
'uploader_id': 'offthepicturepictures',
'uploader_url': 'https://vimeo.com/offthepicturepictures',
'duration': 669,
'upload_date': '20151104',
'timestamp': 1446607180,
'release_date': '20151104',
'release_timestamp': 1446607180,
'like_count': int,
'view_count': int,
'comment_count': int,
'thumbnail': r're:https://i\.vimeocdn\.com/video/1018638656-[\da-f]+-d_1280',
},
# 'params': {'format': 'Original'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# player.vimeo.com URL with source format
'url': 'https://player.vimeo.com/video/859028877',
# 'md5': '19ca3d2463441dee2d2f0671ac2916a2',
'info_dict': {
'id': '859028877',
'ext': 'mp4',
'title': 'Ariana Grande - Honeymoon Avenue (Live from London)',
'uploader': 'Raja Virdi',
'uploader_id': 'rajavirdi',
'uploader_url': 'https://vimeo.com/rajavirdi',
'duration': 309,
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d_1280',
},
# 'params': {'format': 'source'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# user playlist alias -> https://vimeo.com/258705797
'url': 'https://vimeo.com/user26785108/newspiritualguide',
@@ -768,16 +862,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise ExtractorError('Wrong video password', expected=True)
return checked
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
return self._download_json(
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json',
}, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
})
def _extract_from_api(self, video_id, unlisted_hash=None):
viewer = self._download_json(
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
@@ -785,11 +869,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
for retry in (False, True):
try:
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
break
except ExtractorError as e:
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
and 'password' in traverse_obj(
e.cause.response.read(),
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
({json.loads}, 'invalid_parameters', ..., 'field'),
)):
self._verify_video_password(
video_id, self._get_video_password(), viewer['xsrft'])
@@ -798,6 +883,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
info = self._parse_config(self._download_json(
video['config_url'], video_id), video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
if source_format:
info['formats'].append(source_format)
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
info.update({
'description': video.get('description'),
@@ -899,7 +989,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
if config.get('view') == 4:
config = self._verify_player_video_password(
redirect_url, video_id, headers)
return self._parse_config(config, video_id)
info = self._parse_config(config, video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{video_id}', video_id, unlisted_hash)
if source_format:
info['formats'].append(source_format)
return info
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config:
@@ -1269,6 +1364,20 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
IE_DESC = 'Review pages on vimeo'
_VALID_URL = r'https?://vimeo\.com/(?P<user>[^/?#]+)/review/(?P<id>\d+)/(?P<hash>[\da-f]{10})'
_TESTS = [{
'url': 'https://vimeo.com/user170863801/review/996447483/a316d6ed8d',
'info_dict': {
'id': '996447483',
'ext': 'mp4',
'title': 'Rodeo day 1-_2',
'uploader': 'BROADKAST',
'uploader_id': 'user170863801',
'uploader_url': 'https://vimeo.com/user170863801',
'duration': 30,
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML'],
}, {
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
'info_dict': {
@@ -1282,6 +1391,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280',
'uploader_url': 'https://vimeo.com/user21297594',
},
'skip': '404 Not Found',
}, {
'note': 'video player needs Referer',
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
@@ -1316,6 +1426,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
data = self._download_json(data_url, video_id)
viewer = {}
if data.get('isLocked') is True:
video_password = self._get_video_password()
viewer = self._download_json(
@@ -1327,8 +1438,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action', video_id,
unlisted_hash=traverse_obj(config_url, ({parse_qs}, 'h', -1)))
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
if source_format:
info_dict['formats'].append(source_format)
info_dict['description'] = clean_html(clip_data.get('description'))

View File

@@ -90,7 +90,7 @@ class ViuIE(ViuBaseIE):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
for key, value in video_data.items():
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
mobj = re.match(r'subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
if not mobj:
continue
subtitles.setdefault(mobj.group('lang'), []).append({

View File

@@ -27,8 +27,9 @@ from ..utils import (
class WeverseBaseIE(InfoExtractor):
_NETRC_MACHINE = 'weverse'
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api/v2'
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api'
_API_HEADERS = {
'Accept': 'application/json',
'Referer': 'https://weverse.io/',
'WEV-device-Id': str(uuid.uuid4()),
}
@@ -39,14 +40,14 @@ class WeverseBaseIE(InfoExtractor):
headers = {
'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a',
'x-acc-app-version': '2.2.6',
'x-acc-app-version': '3.3.6',
'x-acc-language': 'en',
'x-acc-service-id': 'weverse',
'x-acc-trace-id': str(uuid.uuid4()),
'x-clog-user-device-id': str(uuid.uuid4()),
}
valid_username = traverse_obj(self._download_json(
f'{self._ACCOUNT_API_BASE}/signup/email/status', None, note='Checking username',
f'{self._ACCOUNT_API_BASE}/v2/signup/email/status', None, note='Checking username',
query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
if not valid_username:
raise ExtractorError('Invalid username provided', expected=True)
@@ -54,8 +55,9 @@ class WeverseBaseIE(InfoExtractor):
headers['content-type'] = 'application/json'
try:
auth = self._download_json(
f'{self._ACCOUNT_API_BASE}/auth/token/by-credentials', None, data=json.dumps({
f'{self._ACCOUNT_API_BASE}/v3/auth/token/by-credentials', None, data=json.dumps({
'email': username,
'otpSessionId': 'BY_PASS',
'password': password,
}, separators=(',', ':')).encode(), headers=headers, note='Logging in')
except ExtractorError as e:
@@ -78,8 +80,10 @@ class WeverseBaseIE(InfoExtractor):
# From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js:
key = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
api_path = update_url_query(ep, {
# 'gcc': 'US',
'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4',
'language': 'en',
'os': 'WEB',
'platform': 'WEB',
'wpf': 'pc',
})
@@ -152,7 +156,7 @@ class WeverseBaseIE(InfoExtractor):
'description': ((('extension', 'mediaInfo', 'body'), 'body'), {str}),
'uploader': ('author', 'profileName', {str}),
'uploader_id': ('author', 'memberId', {str}),
'creator': ('community', 'communityName', {str}),
'creators': ('community', 'communityName', {str}, all),
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
'duration': ('extension', 'video', 'playTime', {float_or_none}),
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
@@ -196,7 +200,7 @@ class WeverseIE(WeverseBaseIE):
'channel': 'billlie',
'channel_id': '72',
'channel_url': 'https://weverse.io/billlie',
'creator': 'Billlie',
'creators': ['Billlie'],
'timestamp': 1666262062,
'upload_date': '20221020',
'release_timestamp': 1666262058,
@@ -222,7 +226,7 @@ class WeverseIE(WeverseBaseIE):
'channel': 'lesserafim',
'channel_id': '47',
'channel_url': 'https://weverse.io/lesserafim',
'creator': 'LE SSERAFIM',
'creators': ['LE SSERAFIM'],
'timestamp': 1659353400,
'upload_date': '20220801',
'release_timestamp': 1659353400,
@@ -286,7 +290,7 @@ class WeverseIE(WeverseBaseIE):
elif live_status == 'is_live':
video_info = self._call_api(
f'/video/v1.0/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
video_id, note='Downloading live JSON')
playback = self._parse_json(video_info['lipPlayback'], video_id)
m3u8_url = traverse_obj(playback, (
@@ -302,7 +306,7 @@ class WeverseIE(WeverseBaseIE):
else:
infra_video_id = post['extension']['video']['infraVideoId']
in_key = self._call_api(
f'/video/v1.0/vod/{api_video_id}/inKey?preview=false', video_id,
f'/video/v1.1/vod/{api_video_id}/inKey?preview=false', video_id,
data=b'{}', note='Downloading VOD API key')['inKey']
video_info = self._download_json(
@@ -347,7 +351,6 @@ class WeverseMediaIE(WeverseBaseIE):
_VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<artist>[^/?#]+)/media/(?P<id>[\d-]+)'
_TESTS = [{
'url': 'https://weverse.io/billlie/media/4-116372884',
'md5': '8efc9cfd61b2f25209eb1a5326314d28',
'info_dict': {
'id': 'e-C9wLSQs6o',
'ext': 'mp4',
@@ -358,8 +361,9 @@ class WeverseMediaIE(WeverseBaseIE):
'channel_url': 'https://www.youtube.com/channel/UCyc9sUCxELTDK9vELO5Fzeg',
'uploader': 'Billlie',
'uploader_id': '@Billlie',
'uploader_url': 'http://www.youtube.com/@Billlie',
'uploader_url': 'https://www.youtube.com/@Billlie',
'upload_date': '20230403',
'timestamp': 1680533992,
'duration': 211,
'age_limit': 0,
'playable_in_embed': True,
@@ -372,6 +376,8 @@ class WeverseMediaIE(WeverseBaseIE):
'thumbnail': 'https://i.ytimg.com/vi/e-C9wLSQs6o/maxresdefault.jpg',
'categories': ['Entertainment'],
'tags': 'count:7',
'channel_is_verified': True,
'heatmap': 'count:100',
},
}, {
'url': 'https://weverse.io/billlie/media/3-102914520',
@@ -386,7 +392,7 @@ class WeverseMediaIE(WeverseBaseIE):
'channel': 'billlie',
'channel_id': '72',
'channel_url': 'https://weverse.io/billlie',
'creator': 'Billlie',
'creators': ['Billlie'],
'timestamp': 1662174000,
'upload_date': '20220903',
'release_timestamp': 1662174000,
@@ -432,7 +438,7 @@ class WeverseMomentIE(WeverseBaseIE):
'uploader_id': '66a07e164b56a696ee71c99315ffe27b',
'channel': 'secretnumber',
'channel_id': '56',
'creator': 'SECRET NUMBER',
'creators': ['SECRET NUMBER'],
'duration': 10,
'upload_date': '20230405',
'timestamp': 1680653968,
@@ -441,7 +447,6 @@ class WeverseMomentIE(WeverseBaseIE):
'comment_count': int,
'availability': 'needs_auth',
},
'skip': 'Moment has expired',
}]
def _real_extract(self, url):
@@ -571,7 +576,7 @@ class WeverseLiveIE(WeverseBaseIE):
'channel': 'purplekiss',
'channel_id': '35',
'channel_url': 'https://weverse.io/purplekiss',
'creator': 'PURPLE KISS',
'creators': ['PURPLE KISS'],
'timestamp': 1680780892,
'upload_date': '20230406',
'release_timestamp': 1680780883,
@@ -584,6 +589,31 @@ class WeverseLiveIE(WeverseBaseIE):
'live_status': 'is_live',
},
'skip': 'Livestream has ended',
}, {
'url': 'https://weverse.io/lesserafim',
'info_dict': {
'id': '4-181521628',
'ext': 'mp4',
'title': r're:심심해서요',
'description': '',
'uploader': '채채🤎',
'uploader_id': 'd49b8b06f3cc1d92d655b25ab27ac2e7',
'channel': 'lesserafim',
'channel_id': '47',
'creators': ['LE SSERAFIM'],
'channel_url': 'https://weverse.io/lesserafim',
'timestamp': 1728570273,
'upload_date': '20241010',
'release_timestamp': 1728570264,
'release_date': '20241010',
'thumbnail': r're:https://phinf\.wevpstatic\.net/.+\.png',
'view_count': int,
'like_count': int,
'comment_count': int,
'availability': 'needs_auth',
'live_status': 'is_live',
},
'skip': 'Livestream has ended',
}, {
'url': 'https://weverse.io/billlie/',
'only_matching': True,

View File

@@ -8,6 +8,7 @@ from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
filter_dict,
float_or_none,
int_or_none,
parse_qs,
@@ -25,16 +26,25 @@ class WistiaBaseIE(InfoExtractor):
def _download_embed_config(self, config_type, config_id, referer):
base_url = self._EMBED_BASE_URL + f'{config_type}/{config_id}'
video_password = self.get_param('videopassword')
embed_config = self._download_json(
base_url + '.json', config_id, headers={
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
})
}, query=filter_dict({'password': video_password}))
error = traverse_obj(embed_config, 'error')
if error:
raise ExtractorError(
f'Error while getting the playlist: {error}', expected=True)
if traverse_obj(embed_config, (
'media', ('embed_options', 'embedOptions'), 'plugin',
'passwordProtectedVideo', 'on', any)) == 'true':
if video_password:
raise ExtractorError('Invalid video password', expected=True)
raise ExtractorError(
'This content is password-protected. Use the --video-password option', expected=True)
return embed_config
def _get_real_ext(self, url):

View File

@@ -1,7 +1,17 @@
import base64
import math
import time
from .common import InfoExtractor
from ..utils import InAdvancePagedList, str_or_none, traverse_obj, try_call
from .videa import VideaIE
from ..utils import (
InAdvancePagedList,
int_or_none,
str_or_none,
traverse_obj,
try_call,
update_url_query,
)
class XimalayaBaseIE(InfoExtractor):
@@ -11,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor):
class XimalayaIE(XimalayaBaseIE):
IE_NAME = 'ximalaya'
IE_DESC = '喜马拉雅FM'
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
_TESTS = [
{
'url': 'http://www.ximalaya.com/sound/47740352/',
@@ -71,23 +81,92 @@ class XimalayaIE(XimalayaBaseIE):
'like_count': int,
},
},
{
# VIP-restricted audio
'url': 'https://www.ximalaya.com/sound/562111701',
'only_matching': True,
},
]
@staticmethod
def _decrypt_filename(file_id, seed):
cgstr = ''
key = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890'
for _ in key:
seed = float(int(211 * seed + 30031) % 65536)
r = int(seed / 65536 * len(key))
cgstr += key[r]
key = key.replace(key[r], '')
parts = file_id.split('*')
filename = ''.join(cgstr[int(part)] for part in parts if part.isdecimal())
if not filename.startswith('/'):
filename = '/' + filename
return filename
@staticmethod
def _decrypt_url_params(encrypted_params):
params = VideaIE.rc4(
base64.b64decode(encrypted_params), 'xkt3a41psizxrh9l').split('-')
# sign, token, timestamp
return params[1], params[2], params[3]
def _real_extract(self, url):
scheme = 'https' if url.startswith('https') else 'http'
audio_id = self._match_id(url)
audio_info_file = f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json'
audio_info = self._download_json(
audio_info_file, audio_id,
f'Downloading info json {audio_info_file}', 'Unable to download info file')
f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json', audio_id,
'Downloading info json', 'Unable to download info file')
formats = [{
formats = []
# NOTE: VIP-restricted audio
if audio_info.get('is_paid'):
ts = int(time.time())
vip_info = self._download_json(
f'{scheme}://mpay.ximalaya.com/mobile/track/pay/{audio_id}/{ts}',
audio_id, 'Downloading VIP info json', 'Unable to download VIP info file',
query={'device': 'pc', 'isBackend': 'true', '_': ts})
filename = self._decrypt_filename(vip_info['fileId'], vip_info['seed'])
sign, token, timestamp = self._decrypt_url_params(vip_info['ep'])
vip_url = update_url_query(
f'{vip_info["domain"]}/download/{vip_info["apiVersion"]}{filename}', {
'sign': sign,
'token': token,
'timestamp': timestamp,
'buy_key': vip_info['buyKey'],
'duration': vip_info['duration'],
})
fmt = {
'format_id': 'vip',
'url': vip_url,
'vcodec': 'none',
}
if '_preview_' in vip_url:
self.report_warning(
f'This tracks requires a VIP account. Using a sample instead. {self._login_hint()}')
fmt.update({
'format_note': 'Sample',
'preference': -10,
**traverse_obj(vip_info, {
'filesize': ('sampleLength', {int_or_none}),
'duration': ('sampleDuration', {int_or_none}),
}),
})
else:
fmt.update(traverse_obj(vip_info, {
'filesize': ('totalLength', {int_or_none}),
'duration': ('duration', {int_or_none}),
}))
fmt['abr'] = try_call(lambda: fmt['filesize'] * 8 / fmt['duration'] / 1024)
formats.append(fmt)
formats.extend([{
'format_id': f'{bps}k',
'url': audio_info[k],
'abr': bps,
'vcodec': 'none',
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)]
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)])
thumbnails = []
for k in audio_info:

Some files were not shown because too many files have changed in this diff Show More