mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-05 21:46:59 +00:00
Merge branch 'yt-dlp:master' into murrtube
This commit is contained in:
@@ -1,16 +1,25 @@
|
||||
from ..compat.compat_utils import passthrough_module
|
||||
from ..globals import extractors as _extractors_context
|
||||
from ..globals import plugin_ies as _plugin_ies_context
|
||||
from ..plugins import PluginSpec, register_plugin_spec
|
||||
|
||||
passthrough_module(__name__, '.extractors')
|
||||
del passthrough_module
|
||||
|
||||
register_plugin_spec(PluginSpec(
|
||||
module_name='extractor',
|
||||
suffix='IE',
|
||||
destination=_extractors_context,
|
||||
plugin_destination=_plugin_ies_context,
|
||||
))
|
||||
|
||||
|
||||
def gen_extractor_classes():
|
||||
""" Return a list of supported extractors.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
from .extractors import _ALL_CLASSES
|
||||
|
||||
return _ALL_CLASSES
|
||||
import_extractors()
|
||||
return list(_extractors_context.value.values())
|
||||
|
||||
|
||||
def gen_extractors():
|
||||
@@ -37,6 +46,9 @@ def list_extractors(age_limit=None):
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
from . import extractors
|
||||
import_extractors()
|
||||
return _extractors_context.value[f'{ie_name}IE']
|
||||
|
||||
return getattr(extractors, f'{ie_name}IE')
|
||||
|
||||
def import_extractors():
|
||||
from . import extractors # noqa: F401
|
||||
|
||||
@@ -256,6 +256,7 @@ from .bilibili import (
|
||||
BilibiliCheeseIE,
|
||||
BilibiliCheeseSeasonIE,
|
||||
BilibiliCollectionListIE,
|
||||
BiliBiliDynamicIE,
|
||||
BilibiliFavoritesListIE,
|
||||
BiliBiliIE,
|
||||
BiliBiliPlayerIE,
|
||||
@@ -311,6 +312,7 @@ from .brilliantpala import (
|
||||
)
|
||||
from .bundesliga import BundesligaIE
|
||||
from .bundestag import BundestagIE
|
||||
from .bunnycdn import BunnyCdnIE
|
||||
from .businessinsider import BusinessInsiderIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
@@ -334,6 +336,7 @@ from .canal1 import Canal1IE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalsurmas import CanalsurmasIE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
@@ -440,12 +443,6 @@ from .crowdbunker import (
|
||||
CrowdBunkerIE,
|
||||
)
|
||||
from .crtvg import CrtvgIE
|
||||
from .crunchyroll import (
|
||||
CrunchyrollArtistIE,
|
||||
CrunchyrollBetaIE,
|
||||
CrunchyrollBetaShowIE,
|
||||
CrunchyrollMusicIE,
|
||||
)
|
||||
from .cspan import (
|
||||
CSpanCongressIE,
|
||||
CSpanIE,
|
||||
@@ -459,7 +456,10 @@ from .curiositystream import (
|
||||
CuriosityStreamIE,
|
||||
CuriosityStreamSeriesIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .cwtv import (
|
||||
CWTVIE,
|
||||
CWTVMovieIE,
|
||||
)
|
||||
from .cybrary import (
|
||||
CybraryCourseIE,
|
||||
CybraryIE,
|
||||
@@ -496,10 +496,6 @@ from .daum import (
|
||||
from .daystar import DaystarClipIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import (
|
||||
DeezerAlbumIE,
|
||||
DeezerPlaylistIE,
|
||||
)
|
||||
from .democracynow import DemocracynowIE
|
||||
from .detik import DetikEmbedIE
|
||||
from .deuxm import (
|
||||
@@ -510,6 +506,7 @@ from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .digiview import DigiviewIE
|
||||
from .discogs import DiscogsReleasePlaylistIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
@@ -555,6 +552,7 @@ from .dropout import (
|
||||
DropoutIE,
|
||||
DropoutSeasonIE,
|
||||
)
|
||||
from .drtalks import DrTalksIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import (
|
||||
DRTVIE,
|
||||
@@ -584,6 +582,10 @@ from .egghead import (
|
||||
EggheadCourseIE,
|
||||
EggheadLessonIE,
|
||||
)
|
||||
from .eggs import (
|
||||
EggsArtistIE,
|
||||
EggsIE,
|
||||
)
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
@@ -699,11 +701,6 @@ from .frontendmasters import (
|
||||
FrontendMastersLessonIE,
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import (
|
||||
FunimationIE,
|
||||
FunimationPageIE,
|
||||
FunimationShowIE,
|
||||
)
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
from .fuyintv import FuyinTVIE
|
||||
@@ -1054,6 +1051,7 @@ from .livestream import (
|
||||
)
|
||||
from .livestreamfails import LivestreamfailsIE
|
||||
from .lnk import LnkIE
|
||||
from .loco import LocoIE
|
||||
from .loom import (
|
||||
LoomFolderIE,
|
||||
LoomIE,
|
||||
@@ -1278,6 +1276,10 @@ from .nebula import (
|
||||
)
|
||||
from .nekohacker import NekoHackerIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nest import (
|
||||
NestClipIE,
|
||||
NestIE,
|
||||
)
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
@@ -1532,6 +1534,10 @@ from .pinterest import (
|
||||
PinterestCollectionIE,
|
||||
PinterestIE,
|
||||
)
|
||||
from .piramidetv import (
|
||||
PiramideTVChannelIE,
|
||||
PiramideTVIE,
|
||||
)
|
||||
from .pixivsketch import (
|
||||
PixivSketchIE,
|
||||
PixivSketchUserIE,
|
||||
@@ -1551,6 +1557,7 @@ from .pluralsight import (
|
||||
PluralsightIE,
|
||||
)
|
||||
from .plutotv import PlutoTVIE
|
||||
from .plvideo import PlVideoIE
|
||||
from .podbayfm import (
|
||||
PodbayFMChannelIE,
|
||||
PodbayFMIE,
|
||||
@@ -1873,6 +1880,8 @@ from .skyit import (
|
||||
SkyItVideoIE,
|
||||
SkyItVideoLiveIE,
|
||||
TV8ItIE,
|
||||
TV8ItLiveIE,
|
||||
TV8ItPlaylistIE,
|
||||
)
|
||||
from .skylinewebcams import SkylineWebcamsIE
|
||||
from .skynewsarabia import (
|
||||
@@ -1886,6 +1895,7 @@ from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snapchat import SnapchatSpotlightIE
|
||||
from .snotr import SnotrIE
|
||||
from .softwhiteunderbelly import SoftWhiteUnderbellyIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
SohuVIE,
|
||||
@@ -1981,6 +1991,10 @@ from .streetvoice import StreetVoiceIE
|
||||
from .stretchinternet import StretchInternetIE
|
||||
from .stripchat import StripchatIE
|
||||
from .stv import STVPlayerIE
|
||||
from .subsplash import (
|
||||
SubsplashIE,
|
||||
SubsplashPlaylistIE,
|
||||
)
|
||||
from .substack import SubstackIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .sverigesradio import (
|
||||
@@ -2210,6 +2224,7 @@ from .tvplay import (
|
||||
TVPlayIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tvw import TvwIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
@@ -2333,10 +2348,6 @@ from .viewlift import (
|
||||
ViewLiftIE,
|
||||
)
|
||||
from .viidea import ViideaIE
|
||||
from .viki import (
|
||||
VikiChannelIE,
|
||||
VikiIE,
|
||||
)
|
||||
from .vimeo import (
|
||||
VHXEmbedIE,
|
||||
VimeoAlbumIE,
|
||||
@@ -2354,10 +2365,6 @@ from .vimm import (
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .vine import (
|
||||
VineIE,
|
||||
VineUserIE,
|
||||
)
|
||||
from .viously import ViouslyIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
@@ -2388,7 +2395,6 @@ from .voxmedia import (
|
||||
from .vrt import (
|
||||
VRTIE,
|
||||
DagelijkseKostIE,
|
||||
KetnetIE,
|
||||
Radio1BeIE,
|
||||
VrtNUIE,
|
||||
)
|
||||
|
||||
@@ -421,14 +421,15 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
|
||||
|
||||
class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/#]+)/?(?:\?(?:[^#]+&)?s=(?P<season>[^&#]+))?'
|
||||
_PAGE_SIZE = 25
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/title/90-1597',
|
||||
'url': 'https://abema.tv/video/title/90-1887',
|
||||
'info_dict': {
|
||||
'id': '90-1597',
|
||||
'id': '90-1887',
|
||||
'title': 'シャッフルアイランド',
|
||||
'description': 'md5:61b2425308f41a5282a926edda66f178',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
@@ -436,41 +437,54 @@ class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
'info_dict': {
|
||||
'id': '193-132',
|
||||
'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
|
||||
'description': 'md5:9b59493d1f3a792bafbc7319258e7af8',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/25-102',
|
||||
'url': 'https://abema.tv/video/title/25-1nzan-whrxe',
|
||||
'info_dict': {
|
||||
'id': '25-102',
|
||||
'title': 'ソードアート・オンライン アリシゼーション',
|
||||
'id': '25-1nzan-whrxe',
|
||||
'title': 'ソードアート・オンライン',
|
||||
'description': 'md5:c094904052322e6978495532bdbf06e6',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
'playlist_mincount': 25,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/26-2mzbynr-cph?s=26-2mzbynr-cph_s40',
|
||||
'info_dict': {
|
||||
'title': '〈物語〉シリーズ',
|
||||
'id': '26-2mzbynr-cph',
|
||||
'description': 'md5:e67873de1c88f360af1f0a4b84847a52',
|
||||
},
|
||||
'playlist_count': 59,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, playlist_id, series_version, page):
|
||||
def _fetch_page(self, playlist_id, series_version, season_id, page):
|
||||
query = {
|
||||
'seriesVersion': series_version,
|
||||
'offset': str(page * self._PAGE_SIZE),
|
||||
'order': 'seq',
|
||||
'limit': str(self._PAGE_SIZE),
|
||||
}
|
||||
if season_id:
|
||||
query['seasonId'] = season_id
|
||||
programs = self._call_api(
|
||||
f'v1/video/series/{playlist_id}/programs', playlist_id,
|
||||
note=f'Downloading page {page + 1}',
|
||||
query={
|
||||
'seriesVersion': series_version,
|
||||
'offset': str(page * self._PAGE_SIZE),
|
||||
'order': 'seq',
|
||||
'limit': str(self._PAGE_SIZE),
|
||||
})
|
||||
query=query)
|
||||
yield from (
|
||||
self.url_result(f'https://abema.tv/video/episode/{x}')
|
||||
for x in traverse_obj(programs, ('programs', ..., 'id')))
|
||||
|
||||
def _entries(self, playlist_id, series_version):
|
||||
def _entries(self, playlist_id, series_version, season_id):
|
||||
return OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, playlist_id, series_version),
|
||||
functools.partial(self._fetch_page, playlist_id, series_version, season_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
playlist_id, season_id = self._match_valid_url(url).group('id', 'season')
|
||||
series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
|
||||
self._entries(playlist_id, series_info['version'], season_id), playlist_id=playlist_id,
|
||||
playlist_title=series_info.get('title'),
|
||||
playlist_description=series_info.get('content'))
|
||||
|
||||
@@ -43,14 +43,14 @@ class ACastIE(ACastBaseIE):
|
||||
_VALID_URL = r'''(?x:
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
(?:(?:embed|www|shows)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
|
||||
(?P<channel>[^/?#]+)/(?:episodes/)?(?P<id>[^/#?"]+)
|
||||
)'''
|
||||
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'url': 'https://shows.acast.com/sparpodcast/episodes/2.raggarmordet-rosterurdetforflutna',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
@@ -59,7 +59,7 @@ class ACastIE(ACastBaseIE):
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766,
|
||||
'creator': 'Third Ear Studio',
|
||||
'creators': ['Third Ear Studio'],
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
|
||||
@@ -74,6 +74,9 @@ class ACastIE(ACastBaseIE):
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
@@ -110,7 +113,7 @@ class ACastChannelIE(ACastBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?acast\.com/|
|
||||
(?:(?:www|shows)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
@@ -120,12 +123,15 @@ class ACastChannelIE(ACastBaseIE):
|
||||
'info_dict': {
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
|
||||
'description': 'md5:feca253de9947634605080cd9eeea2bf',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://shows.acast.com/sparpodcast',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -232,7 +232,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import datetime as dt
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,7 +11,7 @@ from ..utils import (
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
unified_timestamp,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
@@ -87,9 +88,9 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'uploader_id': 'rlantnghks',
|
||||
'uploader': '페이즈으',
|
||||
'duration': 10840,
|
||||
'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+',
|
||||
'thumbnail': r're:https?://videoimg\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'upload_date': '20230108',
|
||||
'timestamp': 1673218805,
|
||||
'timestamp': 1673186405,
|
||||
'title': '젠지 페이즈',
|
||||
},
|
||||
'params': {
|
||||
@@ -102,7 +103,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'id': '20170411_BE689A0E_190960999_1_2_h',
|
||||
'ext': 'mp4',
|
||||
'title': '혼자사는여자집',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'uploader': '♥이슬이',
|
||||
'uploader_id': 'dasl8121',
|
||||
'upload_date': '20170411',
|
||||
@@ -119,7 +120,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'id': '20180327_27901457_202289533_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]빨개요♥ (part 1)',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
|
||||
'thumbnail': r're:https?://(?:video|st)img\.(?:sooplive\.co\.kr|afreecatv\.com)/.+',
|
||||
'uploader': '[SA]서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20180327',
|
||||
@@ -187,7 +188,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'formats': formats,
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('file_start', {unified_timestamp}),
|
||||
'timestamp': ('file_start', {parse_iso8601(delimiter=' ', timezone=dt.timedelta(hours=9))}),
|
||||
}),
|
||||
})
|
||||
|
||||
@@ -370,7 +371,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
'title': channel_info.get('TITLE') or station_info.get('station_title'),
|
||||
'uploader': channel_info.get('BJNICK') or station_info.get('station_name'),
|
||||
'uploader_id': broadcaster_id,
|
||||
'timestamp': unified_timestamp(station_info.get('broad_start')),
|
||||
'timestamp': parse_iso8601(station_info.get('broad_start'), delimiter=' ', timezone=dt.timedelta(hours=9)),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'http_headers': {'Referer': url},
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class AZMedienIE(InfoExtractor):
|
||||
@@ -9,15 +8,15 @@ class AZMedienIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.|tv\.)?
|
||||
(?P<host>
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch|
|
||||
tvo-online\.ch
|
||||
)/
|
||||
[^/]+/
|
||||
[^/?#]+/
|
||||
(?P<id>
|
||||
[^/]+-(?P<article_id>\d+)
|
||||
[^/?#]+-\d+
|
||||
)
|
||||
(?:
|
||||
\#video=
|
||||
@@ -47,19 +46,17 @@ class AZMedienIE(InfoExtractor):
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, display_id, article_id, entry_id = self._match_valid_url(url).groups()
|
||||
display_id, entry_id = self._match_valid_url(url).groups()
|
||||
|
||||
if not entry_id:
|
||||
entry_id = self._download_json(
|
||||
self._API_TEMPL % (host, host.split('.')[0]), display_id, query={
|
||||
'variables': json.dumps({
|
||||
'contextId': 'NewsArticle:' + article_id,
|
||||
}),
|
||||
})['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = self._search_json(
|
||||
r'window\.__APOLLO_STATE__\s*=', webpage, 'video data', display_id)
|
||||
entry_id = traverse_obj(data, (
|
||||
lambda _, v: v['__typename'] == 'KalturaData', 'kalturaId', any, {require('kaltura id')}))
|
||||
|
||||
return self.url_result(
|
||||
f'kaltura:{self._PARTNER_ID}:{entry_id}',
|
||||
|
||||
@@ -86,7 +86,7 @@ class BandlabBaseIE(InfoExtractor):
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
|
||||
'url': ('video', 'url', {url_or_none}),
|
||||
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=72)}),
|
||||
'description': ('caption', {str}),
|
||||
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
|
||||
'view_count': ('video', 'counters', 'plays', {int_or_none}),
|
||||
@@ -120,7 +120,7 @@ class BandlabIE(BandlabBaseIE):
|
||||
'duration': 54.629999999999995,
|
||||
'title': 'sweet black',
|
||||
'upload_date': '20231210',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'genres': ['Lofi'],
|
||||
'uploader': 'ender milze',
|
||||
'comment_count': int,
|
||||
@@ -142,7 +142,7 @@ class BandlabIE(BandlabBaseIE):
|
||||
'duration': 54.629999999999995,
|
||||
'title': 'sweet black',
|
||||
'upload_date': '20231210',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'genres': ['Lofi'],
|
||||
'uploader': 'ender milze',
|
||||
'comment_count': int,
|
||||
@@ -158,7 +158,7 @@ class BandlabIE(BandlabBaseIE):
|
||||
'comment_count': int,
|
||||
'genres': ['Other'],
|
||||
'uploader_id': 'user8353034818103753',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
|
||||
'timestamp': 1709625771,
|
||||
'track': 'PodcastMaerchen4b',
|
||||
'duration': 468.14,
|
||||
@@ -178,7 +178,7 @@ class BandlabIE(BandlabBaseIE):
|
||||
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
|
||||
'ext': 'm4a',
|
||||
'timestamp': 1588273294,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
|
||||
'description': 'Final Revision.',
|
||||
'title': 'Replay ( Instrumental)',
|
||||
'uploader': 'David R Sparks',
|
||||
@@ -200,7 +200,7 @@ class BandlabIE(BandlabBaseIE):
|
||||
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
|
||||
'ext': 'mp4',
|
||||
'duration': 44.705,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
|
||||
'comment_count': int,
|
||||
'title': 'backing vocals',
|
||||
'uploader_id': 'marliashya',
|
||||
@@ -224,7 +224,7 @@ class BandlabIE(BandlabBaseIE):
|
||||
'view_count': int,
|
||||
'track': 'Positronic Meltdown',
|
||||
'duration': 318.55,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
|
||||
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
|
||||
'uploader_id': 'microfreaks',
|
||||
'title': 'Positronic Meltdown',
|
||||
@@ -246,7 +246,7 @@ class BandlabIE(BandlabBaseIE):
|
||||
'comment_count': int,
|
||||
'uploader': 'Sorakime',
|
||||
'uploader_id': 'sorakime',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
|
||||
'timestamp': 1691162128,
|
||||
'upload_date': '20230804',
|
||||
'media_type': 'track',
|
||||
|
||||
@@ -4,7 +4,9 @@ import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
@@ -18,7 +20,6 @@ from ..utils import (
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
@@ -63,7 +64,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||
if missing_formats:
|
||||
self.to_screen(
|
||||
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||
f'Format(s) {missing_formats} are missing; you have to '
|
||||
f'become a premium member to download them. {self._login_hint()}')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
@@ -165,14 +166,18 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||
return params
|
||||
|
||||
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||
if qn:
|
||||
params['qn'] = qn
|
||||
def _download_playinfo(self, bvid, cid, headers=None, query=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})}
|
||||
if self.is_logged_in:
|
||||
params.pop('try_look', None)
|
||||
if qn := params.get('qn'):
|
||||
note = f'Downloading video format {qn} for cid {cid}'
|
||||
else:
|
||||
note = f'Downloading video formats for cid {cid}'
|
||||
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||
query=self._sign_wbi(params, bvid), headers=headers,
|
||||
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||
query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -191,7 +196,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
@@ -207,7 +212,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False, headers=self._HEADERS)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
@@ -286,7 +291,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
@@ -639,40 +644,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
headers['Referer'] = url
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
play_info_obj = self._search_json(
|
||||
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
|
||||
if not play_info_obj:
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
play_info = traverse_obj(play_info_obj, ('data', {dict}))
|
||||
if not play_info:
|
||||
if traverse_obj(play_info_obj, 'code') == 87007:
|
||||
toast = get_element_by_class('tips-toast', webpage) or ''
|
||||
msg = clean_html(
|
||||
f'{get_element_by_class("belongs-to", toast) or ""},'
|
||||
+ (get_element_by_class('level', toast) or ''))
|
||||
raise ExtractorError(
|
||||
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
|
||||
raise ExtractorError('Failed to extract play info')
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
page_list_json = not is_festival and traverse_obj(
|
||||
page_list_json = (not is_festival and traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
note='Extracting videos in anthology', headers=headers),
|
||||
'data', expected_type=list) or []
|
||||
'data', expected_type=list)) or []
|
||||
is_anthology = len(page_list_json) > 1
|
||||
|
||||
part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
|
||||
@@ -691,8 +685,6 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
|
||||
@@ -727,62 +719,79 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if not traverse_obj(play_info, ('dash')):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
play_info = None
|
||||
if self.is_logged_in:
|
||||
play_info = traverse_obj(
|
||||
self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None),
|
||||
('data', {dict}))
|
||||
if not play_info:
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
if video_data.get('is_upower_exclusive'):
|
||||
high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
|
||||
msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim=",")}. {self._login_hint()}'
|
||||
if not formats:
|
||||
raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True)
|
||||
if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})):
|
||||
self.report_warning(
|
||||
f'This is a supporter-only video, only the preview will be extracted: {msg}',
|
||||
video_id=video_id)
|
||||
|
||||
if not traverse_obj(play_info, 'dash'):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
@@ -860,10 +869,16 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
headers['Referer'] = url
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
|
||||
play_info = (
|
||||
self._search_json(
|
||||
r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id,
|
||||
end_pattern='\n', default=None)
|
||||
or self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
|
||||
headers=headers))
|
||||
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
@@ -1164,28 +1179,26 @@ class BilibiliSpaceBaseIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>(?:/upload)?/video)?/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/3985676/video',
|
||||
'info_dict': {
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/313580179/video',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
'To download audios, add a "/upload/audio" to the URL')
|
||||
|
||||
def fetch_page(page_idx):
|
||||
query = {
|
||||
@@ -1198,6 +1211,12 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'ps': 30,
|
||||
'tid': 0,
|
||||
'web_location': 1550101,
|
||||
'dm_img_list': '[]',
|
||||
'dm_img_str': base64.b64encode(
|
||||
''.join(random.choices(string.printable, k=random.randint(16, 64))).encode())[:-2].decode(),
|
||||
'dm_cover_img_str': base64.b64encode(
|
||||
''.join(random.choices(string.printable, k=random.randint(32, 128))).encode())[:-2].decode(),
|
||||
'dm_img_inter': '{"ds":[],"wh":[6093,6631,31],"of":[430,760,380]}',
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -1208,14 +1227,14 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
'Request is blocked by server (412), please wait and try later.', expected=True)
|
||||
raise
|
||||
status_code = response['code']
|
||||
if status_code == -401:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
elif status_code == -352 and not self.is_logged_in:
|
||||
self.raise_login_required('Request is rejected, you need to login to access playlist')
|
||||
'Request is blocked by server (401), please wait and try later.', expected=True)
|
||||
elif status_code == -352:
|
||||
raise ExtractorError('Request is rejected by server (352)', expected=True)
|
||||
elif status_code != 0:
|
||||
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
|
||||
return response['data']
|
||||
@@ -1237,9 +1256,9 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
|
||||
|
||||
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/(?:upload/)?audio'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/313580179/audio',
|
||||
'url': 'https://space.bilibili.com/313580179/upload/audio',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
@@ -1262,7 +1281,8 @@ class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in page_data.get('data', []):
|
||||
# data is None when the playlist is empty
|
||||
for entry in page_data.get('data') or []:
|
||||
yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
@@ -1286,30 +1306,43 @@ class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
|
||||
|
||||
|
||||
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
|
||||
_VALID_URL = [
|
||||
r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)',
|
||||
r'https?://space\.bilibili\.com/(?P<mid>\d+)/lists/(?P<sid>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'url': 'https://space.bilibili.com/2142762/lists/3662502?type=season',
|
||||
'info_dict': {
|
||||
'id': '2142762_57445',
|
||||
'title': '【完结】《底特律 变人》全结局流程解说',
|
||||
'description': '',
|
||||
'id': '2142762_3662502',
|
||||
'title': '合集·《黑神话悟空》流程解说',
|
||||
'description': '黑神话悟空 相关节目',
|
||||
'uploader': '老戴在此',
|
||||
'uploader_id': '2142762',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/22302e17dc849dd4533606d71bc89df162c3a9bf.jpg',
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
'playlist_mincount': 62,
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/2142762/lists/3662502',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BilibiliSeriesListIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
'https://api.bilibili.com/x/polymer/web-space/seasons_archives_list',
|
||||
playlist_id, note=f'Downloading page {page_idx}', headers={'Referer': url},
|
||||
query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1336,9 +1369,12 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
|
||||
|
||||
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
|
||||
_VALID_URL = [
|
||||
r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)',
|
||||
r'https?://space\.bilibili\.com/(?P<mid>\d+)/lists/(?P<sid>\d+)/?\?(?:[^#]+&)?type=series(?:[&#]|$)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'url': 'https://space.bilibili.com/1958703906/lists/547718?type=series',
|
||||
'info_dict': {
|
||||
'id': '1958703906_547718',
|
||||
'title': '直播回放',
|
||||
@@ -1351,6 +1387,9 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -1369,7 +1408,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/series/archives',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
playlist_id, note=f'Downloading page {page_idx}', headers={'Referer': url},
|
||||
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1557,16 +1596,16 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
|
||||
error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
|
||||
error = traverse_obj(initial_state, (('error', 'listError'), all, lambda _, v: v['code'], any))
|
||||
if error and error['code'] != 200:
|
||||
error_code = error.get('trueCode')
|
||||
if error_code == -400 and list_id == 'watchlater':
|
||||
self.raise_login_required('You need to login to access your watchlater playlist')
|
||||
elif error_code == -403:
|
||||
self.raise_login_required('This is a private playlist. You need to login as its owner')
|
||||
elif error_code == 11010:
|
||||
raise ExtractorError('Playlist is no longer available', expected=True)
|
||||
raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
|
||||
raise ExtractorError(f'Could not access playlist: {error_code} {error.get("message")}')
|
||||
|
||||
query = {
|
||||
'ps': 20,
|
||||
@@ -1848,6 +1887,47 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class BiliBiliDynamicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:t\.bilibili\.com|(?:www\.)?bilibili\.com/opus)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://t.bilibili.com/998134289197432852',
|
||||
'info_dict': {
|
||||
'id': 'BV1TAmBYVEJr',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '1192648858',
|
||||
'comment_count': int,
|
||||
'_old_archive_ids': ['bilibili 113457567568273_part1'],
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/50091efd965d9f13ff6814f7ad374f90ab21e77d.jpg',
|
||||
'duration': 929.238,
|
||||
'upload_date': '20241110',
|
||||
'uploader': '何同学工作室',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'title': '美国小朋友就玩这个?!何同学工作室11月开箱',
|
||||
'description': '本期产品信息:\n机器狗\n气味模拟器\nCloudboom Strike LS\n无弦吉他\n蓝牙磁带音箱\n神奇画板',
|
||||
'timestamp': 1731232800,
|
||||
'tags': list,
|
||||
'chapters': list,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
# Without the newer chrome UA, the API will return an error (-352)
|
||||
post_data = self._download_json(
|
||||
'https://api.bilibili.com/x/polymer/web-dynamic/v1/detail', post_id,
|
||||
query={'id': post_id}, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
})
|
||||
video_url = traverse_obj(post_data, (
|
||||
'data', 'item', (None, 'orig'), 'modules', 'module_dynamic',
|
||||
(('major', ('archive', 'pgc')), ('additional', ('reserve', 'common'))),
|
||||
'jump_url', {url_or_none}, any, {self._proto_relative_url}))
|
||||
if not video_url or (self.suitable(video_url) and post_id == self._match_id(video_url)):
|
||||
raise ExtractorError('No valid video URL found', expected=True)
|
||||
return self.url_result(video_url)
|
||||
|
||||
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
|
||||
@@ -53,7 +53,7 @@ class BlueskyIE(InfoExtractor):
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'title': 'Bluesky now has video! Update your app to version 1.91 or refresh on ...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
@@ -88,7 +88,7 @@ class BlueskyIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
|
||||
'md5': 'cc0110ed1f6b0247caac8234cc1e861d',
|
||||
'info_dict': {
|
||||
'id': '3l3w4tnezek2e',
|
||||
'ext': 'mp4',
|
||||
@@ -133,6 +133,8 @@ class BlueskyIE(InfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': [],
|
||||
'chapters': list,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
@@ -170,7 +172,7 @@ class BlueskyIE(InfoExtractor):
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'title': 'Bluesky now has video! Update your app to version 1.91 or refresh on ...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
@@ -184,14 +186,14 @@ class BlueskyIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
|
||||
'url': 'https://bsky.app/profile/cinny.bun.how/post/3l7rdfxhyds2f',
|
||||
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||
'info_dict': {
|
||||
'id': '3l7rdfxhyds2f',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'cinnamon',
|
||||
'uploader_id': 'alt.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
|
||||
'uploader': 'cinnamon 🐇 🏳️⚧️',
|
||||
'uploader_id': 'cinny.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/cinny.bun.how',
|
||||
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
@@ -253,7 +255,7 @@ class BlueskyIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '3l77u64l7le2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'hearing people on twitter say that bluesky isn\'...',
|
||||
'title': "hearing people on twitter say that bluesky isn't funny yet so post t...",
|
||||
'like_count': int,
|
||||
'uploader_id': 'thafnine.net',
|
||||
'uploader_url': 'https://bsky.app/profile/thafnine.net',
|
||||
@@ -284,17 +286,19 @@ class BlueskyIE(InfoExtractor):
|
||||
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
|
||||
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
|
||||
|
||||
def _real_extract(self, url):
|
||||
handle, video_id = self._match_valid_url(url).group('handle', 'id')
|
||||
|
||||
post = self._download_json(
|
||||
def _extract_post(self, handle, post_id):
|
||||
return self._download_json(
|
||||
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
|
||||
video_id, query={
|
||||
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
|
||||
post_id, query={
|
||||
'uri': f'at://{handle}/app.bsky.feed.post/{post_id}',
|
||||
'depth': 0,
|
||||
'parentHeight': 0,
|
||||
})['thread']['post']
|
||||
|
||||
def _real_extract(self, url):
|
||||
handle, video_id = self._match_valid_url(url).group('handle', 'id')
|
||||
post = self._extract_post(handle, video_id)
|
||||
|
||||
entries = []
|
||||
# app.bsky.embed.video.view/app.bsky.embed.external.view
|
||||
entries.extend(self._extract_videos(post, video_id))
|
||||
@@ -341,6 +345,7 @@ class BlueskyIE(InfoExtractor):
|
||||
|
||||
formats.append({
|
||||
'format_id': 'blob',
|
||||
'quality': 1,
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||
@@ -382,7 +387,7 @@ class BlueskyIE(InfoExtractor):
|
||||
'age_limit': (
|
||||
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
|
||||
'description': (*record_path, 'text', {str}, filter),
|
||||
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=72)}),
|
||||
}),
|
||||
})
|
||||
return entries
|
||||
|
||||
@@ -31,6 +31,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
@@ -935,8 +936,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
|
||||
if content_type == 'playlist':
|
||||
return self.playlist_result(
|
||||
[self._parse_brightcove_metadata(vid, vid.get('id'), headers)
|
||||
for vid in json_data.get('videos', []) if vid.get('id')],
|
||||
(self._parse_brightcove_metadata(vid, vid['id'], headers)
|
||||
for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))),
|
||||
json_data.get('id'), json_data.get('name'),
|
||||
json_data.get('description'))
|
||||
|
||||
|
||||
178
yt_dlp/extractor/bunnycdn.py
Normal file
178
yt_dlp/extractor/bunnycdn.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class BunnyCdnIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:iframe\.mediadelivery\.net|video\.bunnycdn\.com)/(?:embed|play)/(?P<library_id>\d+)/(?P<id>[\da-f-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL}[^\'"]*)[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://iframe.mediadelivery.net/embed/113933/e73edec1-e381-4c8b-ae73-717a140e0924',
|
||||
'info_dict': {
|
||||
'id': 'e73edec1-e381-4c8b-ae73-717a140e0924',
|
||||
'ext': 'mp4',
|
||||
'title': 'mistress morgana (3).mp4',
|
||||
'description': '',
|
||||
'timestamp': 1693251673,
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/e73edec1-e381-4c8b-ae73-717a140e0924/thumbnail\.jpg',
|
||||
'duration': 7.0,
|
||||
'upload_date': '20230828',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://iframe.mediadelivery.net/play/136145/32e34c4b-0d72-437c-9abb-05e67657da34',
|
||||
'info_dict': {
|
||||
'id': '32e34c4b-0d72-437c-9abb-05e67657da34',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1691145748,
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/32e34c4b-0d72-437c-9abb-05e67657da34/thumbnail_9172dc16\.jpg',
|
||||
'duration': 106.0,
|
||||
'description': 'md5:981a3e899a5c78352b21ed8b2f1efd81',
|
||||
'upload_date': '20230804',
|
||||
'title': 'Sanela ist Teil der #arbeitsmarktkraft',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# Stream requires activation and pings
|
||||
'url': 'https://iframe.mediadelivery.net/embed/200867/2e8545ec-509d-4571-b855-4cf0235ccd75',
|
||||
'info_dict': {
|
||||
'id': '2e8545ec-509d-4571-b855-4cf0235ccd75',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1708497752,
|
||||
'title': 'netflix part 1',
|
||||
'duration': 3959.0,
|
||||
'description': '',
|
||||
'upload_date': '20240221',
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/2e8545ec-509d-4571-b855-4cf0235ccd75/thumbnail\.jpg',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# Stream requires Referer
|
||||
'url': 'https://conword.io/',
|
||||
'info_dict': {
|
||||
'id': '3a5d863e-9cd6-447e-b6ef-e289af50b349',
|
||||
'ext': 'mp4',
|
||||
'title': 'Conword bei der Stadt Köln und Stadt Dortmund',
|
||||
'description': '',
|
||||
'upload_date': '20231031',
|
||||
'duration': 31.0,
|
||||
'thumbnail': 'https://video.watchuh.com/3a5d863e-9cd6-447e-b6ef-e289af50b349/thumbnail.jpg',
|
||||
'timestamp': 1698783879,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# URL requires token and expires
|
||||
'url': 'https://www.stockphotos.com/video/moscow-subway-the-train-is-arriving-at-the-park-kultury-station-10017830',
|
||||
'info_dict': {
|
||||
'id': '0b02fa20-4e8c-4140-8f87-f64d820a3386',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/0b02fa20-4e8c-4140-8f87-f64d820a3386/thumbnail\.jpg',
|
||||
'title': 'Moscow subway. The train is arriving at the Park Kultury station.',
|
||||
'upload_date': '20240531',
|
||||
'duration': 18.0,
|
||||
'timestamp': 1717152269,
|
||||
'description': '',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
yield smuggle_url(embed_url, {'Referer': url})
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
video_id, library_id = self._match_valid_url(url).group('id', 'library_id')
|
||||
webpage = self._download_webpage(
|
||||
f'https://iframe.mediadelivery.net/embed/{library_id}/{video_id}', video_id,
|
||||
headers=traverse_obj(smuggled_data, {'Referer': 'Referer'}),
|
||||
query=traverse_obj(parse_qs(url), {'token': 'token', 'expires': 'expires'}))
|
||||
|
||||
if html_title := self._html_extract_title(webpage, default=None) == '403':
|
||||
raise ExtractorError(
|
||||
'This video is inaccessible. Setting a Referer header '
|
||||
'might be required to access the video', expected=True)
|
||||
elif html_title == '404':
|
||||
raise ExtractorError('This video does not exist', expected=True)
|
||||
|
||||
headers = {'Referer': url}
|
||||
|
||||
info = traverse_obj(self._parse_html5_media_entries(url, webpage, video_id, _headers=headers), 0) or {}
|
||||
formats = info.get('formats') or []
|
||||
subtitles = info.get('subtitles') or {}
|
||||
|
||||
original_url = self._search_regex(
|
||||
r'(?:var|const|let)\s+originalUrl\s*=\s*["\']([^"\']+)["\']', webpage, 'original url', default=None)
|
||||
if url_or_none(original_url):
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(original_url), video_id=video_id, note='Checking original',
|
||||
headers=headers, fatal=False, expected_status=(403, 404))
|
||||
if urlh and urlh.status == 200:
|
||||
formats.append({
|
||||
'url': original_url,
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
'http_headers': headers,
|
||||
'ext': urlhandle_detect_ext(urlh, default='mp4'),
|
||||
'filesize': int_or_none(urlh.get_header('Content-Length')),
|
||||
})
|
||||
|
||||
# MediaCage Streams require activation and pings
|
||||
src_url = self._search_regex(
|
||||
r'\.setAttribute\([\'"]src[\'"],\s*[\'"]([^\'"]+)[\'"]\)', webpage, 'src url', default=None)
|
||||
activation_url = self._search_regex(
|
||||
r'loadUrl\([\'"]([^\'"]+/activate)[\'"]', webpage, 'activation url', default=None)
|
||||
ping_url = self._search_regex(
|
||||
r'loadUrl\([\'"]([^\'"]+/ping)[\'"]', webpage, 'ping url', default=None)
|
||||
secret = traverse_obj(parse_qs(src_url), ('secret', 0))
|
||||
context_id = traverse_obj(parse_qs(src_url), ('contextId', 0))
|
||||
ping_data = {}
|
||||
if src_url and activation_url and ping_url and secret and context_id:
|
||||
self._download_webpage(
|
||||
activation_url, video_id, headers=headers, note='Downloading activation data')
|
||||
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, 'mp4', headers=headers, m3u8_id='hls', fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt.update({
|
||||
'protocol': 'bunnycdn',
|
||||
'http_headers': headers,
|
||||
})
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
ping_data = {
|
||||
'_bunnycdn_ping_data': {
|
||||
'url': ping_url,
|
||||
'headers': headers,
|
||||
'secret': secret,
|
||||
'context_id': context_id,
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(webpage, ({find_element(id='main-video', html=True)}, {extract_attributes}, {
|
||||
'title': ('data-plyr-config', {json.loads}, 'title', {str}),
|
||||
'thumbnail': ('data-poster', {url_or_none}),
|
||||
})),
|
||||
**ping_data,
|
||||
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||
}
|
||||
84
yt_dlp/extractor/canalsurmas.py
Normal file
84
yt_dlp/extractor/canalsurmas.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CanalsurmasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?canalsurmas\.es/videos/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.canalsurmas.es/videos/44006-el-gran-queo-1-lora-del-rio-sevilla-20072014',
|
||||
'md5': '861f86fdc1221175e15523047d0087ef',
|
||||
'info_dict': {
|
||||
'id': '44006',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lora del Río (Sevilla)',
|
||||
'description': 'md5:3d9ee40a9b1b26ed8259e6b71ed27b8b',
|
||||
'thumbnail': 'https://cdn2.rtva.interactvty.com/content_cards/00f3e8f67b0a4f3b90a4a14618a48b0d.jpg',
|
||||
'timestamp': 1648123182,
|
||||
'upload_date': '20220324',
|
||||
},
|
||||
}]
|
||||
_API_BASE = 'https://api-rtva.interactvty.com'
|
||||
_access_token = None
|
||||
|
||||
@staticmethod
|
||||
def _is_jwt_expired(token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 300
|
||||
|
||||
def _call_api(self, endpoint, video_id, fields=None):
|
||||
if not self._access_token or self._is_jwt_expired(self._access_token):
|
||||
self._access_token = self._download_json(
|
||||
f'{self._API_BASE}/jwt/token/', None,
|
||||
'Downloading access token', 'Failed to download access token',
|
||||
headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps({
|
||||
'username': 'canalsur_demo',
|
||||
'password': 'dsUBXUcI',
|
||||
}).encode())['access']
|
||||
|
||||
return self._download_json(
|
||||
f'{self._API_BASE}/api/2.0/contents/{endpoint}/{video_id}/', video_id,
|
||||
f'Downloading {endpoint} API JSON', f'Failed to download {endpoint} API JSON',
|
||||
headers={'Authorization': f'jwtok {self._access_token}'},
|
||||
query={'optional_fields': ','.join(variadic(fields))} if fields else None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._call_api('content', video_id, fields=[
|
||||
'description', 'image', 'duration', 'created_at', 'tags',
|
||||
])
|
||||
stream_info = self._call_api('content_resources', video_id, 'media_url')
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for stream_url in traverse_obj(stream_info, ('results', ..., 'media_url', {url_or_none})):
|
||||
if determine_ext(stream_url) == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream_url, video_id, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({'url': stream_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('name', {str.strip}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
}),
|
||||
}
|
||||
@@ -1,29 +1,32 @@
|
||||
import base64
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
jwt_decode_hs256,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
replace_extension,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj, trim_str
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
@@ -516,9 +519,43 @@ class CBCPlayerPlaylistIE(InfoExtractor):
|
||||
return self.playlist_result(entries(), playlist_id)
|
||||
|
||||
|
||||
class CBCGemIE(InfoExtractor):
|
||||
class CBCGemBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'cbcgem'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _call_show_api(self, item_id, display_id=None):
|
||||
return self._download_json(
|
||||
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}',
|
||||
display_id or item_id, query={'device': 'web'})
|
||||
|
||||
def _extract_item_info(self, item_info):
|
||||
episode_number = None
|
||||
title = traverse_obj(item_info, ('title', {str}))
|
||||
if title and (mobj := re.match(r'(?P<episode>\d+)\. (?P<title>.+)', title)):
|
||||
episode_number = int_or_none(mobj.group('episode'))
|
||||
title = mobj.group('title')
|
||||
|
||||
return {
|
||||
'episode_number': episode_number,
|
||||
**traverse_obj(item_info, {
|
||||
'id': ('url', {str}),
|
||||
'episode_id': ('url', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('images', 'card', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'duration': ('metadata', 'duration', {int_or_none}),
|
||||
'release_timestamp': ('metadata', 'airDate', {unified_timestamp}),
|
||||
'timestamp': ('metadata', 'availabilityDate', {unified_timestamp}),
|
||||
'age_limit': ('metadata', 'rating', {trim_str(start='C')}, {parse_age_limit}),
|
||||
}),
|
||||
'episode': title,
|
||||
'title': title,
|
||||
}
|
||||
|
||||
|
||||
class CBCGemIE(CBCGemBaseIE):
|
||||
IE_NAME = 'gem.cbc.ca'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)'
|
||||
_TESTS = [{
|
||||
# This is a normal, public, TV show video
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||
@@ -529,7 +566,7 @@ class CBCGemIE(InfoExtractor):
|
||||
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
||||
'duration': 1324,
|
||||
'categories': ['comedy'],
|
||||
'genres': ['Comédie et humour'],
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season': 'Season 6',
|
||||
'season_number': 6,
|
||||
@@ -537,9 +574,10 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode_number': 1,
|
||||
'episode_id': 'schitts-creek/s06e01',
|
||||
'upload_date': '20210618',
|
||||
'timestamp': 1623988800,
|
||||
'timestamp': 1623974400,
|
||||
'release_date': '20200107',
|
||||
'release_timestamp': 1578427200,
|
||||
'release_timestamp': 1578355200,
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
@@ -557,12 +595,13 @@ class CBCGemIE(InfoExtractor):
|
||||
'episode_number': 1,
|
||||
'episode': 'The Cup Runneth Over',
|
||||
'episode_id': 'schitts-creek/s01e01',
|
||||
'duration': 1309,
|
||||
'categories': ['comedy'],
|
||||
'duration': 1308,
|
||||
'genres': ['Comédie et humour'],
|
||||
'upload_date': '20210617',
|
||||
'timestamp': 1623902400,
|
||||
'release_date': '20151124',
|
||||
'release_timestamp': 1448323200,
|
||||
'timestamp': 1623888000,
|
||||
'release_date': '20151123',
|
||||
'release_timestamp': 1448236800,
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
@@ -570,82 +609,107 @@ class CBCGemIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
|
||||
_NETRC_MACHINE = 'cbcgem'
|
||||
_CLIENT_ID = 'fc05b0ee-3865-4400-a3cc-3da82c330c23'
|
||||
_refresh_token = None
|
||||
_access_token = None
|
||||
_claims_token = None
|
||||
|
||||
def _new_claims_token(self, email, password):
|
||||
data = json.dumps({
|
||||
'email': email,
|
||||
'password': password,
|
||||
}).encode()
|
||||
headers = {'content-type': 'application/json'}
|
||||
query = {'apikey': self._TOKEN_API_KEY}
|
||||
resp = self._download_json('https://api.loginradius.com/identity/v2/auth/login',
|
||||
None, data=data, headers=headers, query=query)
|
||||
access_token = resp['access_token']
|
||||
@functools.cached_property
|
||||
def _ropc_settings(self):
|
||||
return self._download_json(
|
||||
'https://services.radio-canada.ca/ott/catalog/v1/gem/settings', None,
|
||||
'Downloading site settings', query={'device': 'web'})['identityManagement']['ropc']
|
||||
|
||||
query = {
|
||||
'access_token': access_token,
|
||||
'apikey': self._TOKEN_API_KEY,
|
||||
'jwtapp': 'jwt',
|
||||
}
|
||||
resp = self._download_json('https://cloud-api.loginradius.com/sso/jwt/api/token',
|
||||
None, headers=headers, query=query)
|
||||
sig = resp['signature']
|
||||
def _is_jwt_expired(self, token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 300
|
||||
|
||||
data = json.dumps({'jwt': sig}).encode()
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
|
||||
None, data=data, headers=headers, expected_status=426)
|
||||
cbc_access_token = resp['accessToken']
|
||||
def _call_oauth_api(self, oauth_data, note='Refreshing access token'):
|
||||
response = self._download_json(
|
||||
self._ropc_settings['url'], None, note, data=urlencode_postdata({
|
||||
'client_id': self._CLIENT_ID,
|
||||
**oauth_data,
|
||||
'scope': self._ropc_settings['scopes'],
|
||||
}))
|
||||
self._refresh_token = response['refresh_token']
|
||||
self._access_token = response['access_token']
|
||||
self.cache.store(self._NETRC_MACHINE, 'token_data', [self._refresh_token, self._access_token])
|
||||
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
|
||||
None, headers=headers, expected_status=426)
|
||||
return resp['claimsToken']
|
||||
def _perform_login(self, username, password):
|
||||
if not self._refresh_token:
|
||||
self._refresh_token, self._access_token = self.cache.load(
|
||||
self._NETRC_MACHINE, 'token_data', default=[None, None])
|
||||
|
||||
def _get_claims_token_expiry(self):
|
||||
# Token is a JWT
|
||||
# JWT is decoded here and 'exp' field is extracted
|
||||
# It is a Unix timestamp for when the token expires
|
||||
b64_data = self._claims_token.split('.')[1]
|
||||
data = base64.urlsafe_b64decode(b64_data + '==')
|
||||
return json.loads(data)['exp']
|
||||
if self._refresh_token and self._access_token:
|
||||
self.write_debug('Using cached refresh token')
|
||||
if not self._claims_token:
|
||||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||
return
|
||||
|
||||
def claims_token_expired(self):
|
||||
exp = self._get_claims_token_expiry()
|
||||
# It will expire in less than 10 seconds, or has already expired
|
||||
return exp - time.time() < 10
|
||||
try:
|
||||
self._call_oauth_api({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
}, note='Logging in')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
def claims_token_valid(self):
|
||||
return self._claims_token is not None and not self.claims_token_expired()
|
||||
def _fetch_access_token(self):
|
||||
if self._is_jwt_expired(self._access_token):
|
||||
try:
|
||||
self._call_oauth_api({
|
||||
'grant_type': 'refresh_token',
|
||||
'refresh_token': self._refresh_token,
|
||||
})
|
||||
except ExtractorError:
|
||||
self._refresh_token, self._access_token = None, None
|
||||
self.cache.store(self._NETRC_MACHINE, 'token_data', [None, None])
|
||||
self.report_warning('Refresh token has been invalidated; retrying with credentials')
|
||||
self._perform_login(*self._get_login_info())
|
||||
|
||||
def _get_claims_token(self, email, password):
|
||||
if not self.claims_token_valid():
|
||||
self._claims_token = self._new_claims_token(email, password)
|
||||
return self._access_token
|
||||
|
||||
def _fetch_claims_token(self):
|
||||
if not self._get_login_info()[0]:
|
||||
return None
|
||||
|
||||
if not self._claims_token or self._is_jwt_expired(self._claims_token):
|
||||
self._claims_token = self._download_json(
|
||||
'https://services.radio-canada.ca/ott/subscription/v2/gem/Subscriber/profile',
|
||||
None, 'Downloading claims token', query={'device': 'web'},
|
||||
headers={'Authorization': f'Bearer {self._fetch_access_token()}'})['claimsToken']
|
||||
self.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
|
||||
else:
|
||||
self.write_debug('Using cached claims token')
|
||||
|
||||
return self._claims_token
|
||||
|
||||
def _real_initialize(self):
|
||||
if self.claims_token_valid():
|
||||
return
|
||||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json(
|
||||
f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
|
||||
video_id, expected_status=426)
|
||||
video_id, season_number = self._match_valid_url(url).group('id', 'season')
|
||||
video_info = self._call_show_api(video_id)
|
||||
item_info = traverse_obj(video_info, (
|
||||
'content', ..., 'lineups', ..., 'items',
|
||||
lambda _, v: v['url'] == video_id, any, {require('item info')}))
|
||||
|
||||
email, password = self._get_login_info()
|
||||
if email and password:
|
||||
claims_token = self._get_claims_token(email, password)
|
||||
headers = {'x-claims-token': claims_token}
|
||||
else:
|
||||
headers = {}
|
||||
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
||||
headers = {}
|
||||
if claims_token := self._fetch_claims_token():
|
||||
headers['x-claims-token'] = claims_token
|
||||
|
||||
m3u8_info = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/',
|
||||
video_id, headers=headers, query={
|
||||
'appCode': 'gem',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestVersion': '2',
|
||||
'manifestType': 'desktop',
|
||||
'idMedia': item_info['idMedia'],
|
||||
})
|
||||
|
||||
if m3u8_info.get('errorCode') == 1:
|
||||
self.raise_geo_restricted(countries=['CA'])
|
||||
@@ -671,26 +735,20 @@ class CBCGemIE(InfoExtractor):
|
||||
fmt['preference'] = -2
|
||||
|
||||
return {
|
||||
'season_number': int_or_none(season_number),
|
||||
**traverse_obj(video_info, {
|
||||
'series': ('title', {str}),
|
||||
'season_number': ('structuredMetadata', 'partofSeason', 'seasonNumber', {int_or_none}),
|
||||
'genres': ('structuredMetadata', 'genre', ..., {str}),
|
||||
}),
|
||||
**self._extract_item_info(item_info),
|
||||
'id': video_id,
|
||||
'episode_id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'series': ('series', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'categories': ('category', {str}, all),
|
||||
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CBCGemPlaylistIE(InfoExtractor):
|
||||
class CBCGemPlaylistIE(CBCGemBaseIE):
|
||||
IE_NAME = 'gem.cbc.ca:playlist'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
@@ -700,70 +758,35 @@ class CBCGemPlaylistIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'schitts-creek/s06',
|
||||
'title': 'Season 6',
|
||||
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 6,
|
||||
'season': 'Season 6',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
|
||||
|
||||
def _entries(self, season_info):
|
||||
for episode in traverse_obj(season_info, ('items', lambda _, v: v['url'])):
|
||||
yield self.url_result(
|
||||
f'https://gem.cbc.ca/media/{episode["url"]}', CBCGemIE,
|
||||
**self._extract_item_info(episode))
|
||||
|
||||
def _real_extract(self, url):
|
||||
match = self._match_valid_url(url)
|
||||
season_id = match.group('id')
|
||||
show = match.group('show')
|
||||
show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426)
|
||||
season = int(match.group('season'))
|
||||
season_id, show, season = self._match_valid_url(url).group('id', 'show', 'season')
|
||||
show_info = self._call_show_api(show, display_id=season_id)
|
||||
season_info = traverse_obj(show_info, (
|
||||
'content', ..., 'lineups',
|
||||
lambda _, v: v['seasonNumber'] == int(season), any, {require('season info')}))
|
||||
|
||||
season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None)
|
||||
|
||||
if season_info is None:
|
||||
raise ExtractorError(f'Couldn\'t find season {season} of {show}')
|
||||
|
||||
episodes = []
|
||||
for episode in season_info['assets']:
|
||||
episodes.append({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'CBCGem',
|
||||
'url': 'https://gem.cbc.ca/media/' + episode['id'],
|
||||
'id': episode['id'],
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'thumbnail': episode.get('image'),
|
||||
'series': episode.get('series'),
|
||||
'season_number': episode.get('season'),
|
||||
'season': season_info['title'],
|
||||
'season_id': season_info.get('id'),
|
||||
'episode_number': episode.get('episode'),
|
||||
'episode': episode.get('title'),
|
||||
'episode_id': episode['id'],
|
||||
'duration': episode.get('duration'),
|
||||
'categories': [episode.get('category')],
|
||||
})
|
||||
|
||||
thumbnail = None
|
||||
tn_uri = season_info.get('image')
|
||||
# the-national was observed to use a "data:image/png;base64"
|
||||
# URI for their 'image' value. The image was 1x1, and is
|
||||
# probably just a placeholder, so it is ignored.
|
||||
if tn_uri is not None and not tn_uri.startswith('data:'):
|
||||
thumbnail = tn_uri
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': episodes,
|
||||
'id': season_id,
|
||||
'title': season_info['title'],
|
||||
'description': season_info.get('description'),
|
||||
'thumbnail': thumbnail,
|
||||
'series': show_info.get('title'),
|
||||
'season_number': season_info.get('season'),
|
||||
'season': season_info['title'],
|
||||
}
|
||||
return self.playlist_result(
|
||||
self._entries(season_info), season_id,
|
||||
**traverse_obj(season_info, {
|
||||
'title': ('title', {str}),
|
||||
'season': ('title', {str}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
}), series=traverse_obj(show_info, ('title', {str})))
|
||||
|
||||
|
||||
class CBCGemLiveIE(InfoExtractor):
|
||||
|
||||
@@ -121,10 +121,7 @@ class CDAIE(InfoExtractor):
|
||||
}, **kwargs)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
app_version = random.choice((
|
||||
'1.2.88 build 15306',
|
||||
'1.2.174 build 18469',
|
||||
))
|
||||
app_version = '1.2.255 build 21541'
|
||||
android_version = random.randrange(8, 14)
|
||||
phone_model = random.choice((
|
||||
# x-kom.pl top selling Android smartphones, as of 2022-12-26
|
||||
@@ -190,7 +187,7 @@ class CDAIE(InfoExtractor):
|
||||
meta = self._download_json(
|
||||
f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
|
||||
|
||||
uploader = traverse_obj(meta, 'author', 'login')
|
||||
uploader = traverse_obj(meta, ('author', 'login', {str}))
|
||||
|
||||
formats = [{
|
||||
'url': quality['file'],
|
||||
|
||||
@@ -59,16 +59,15 @@ class ChaturbateIE(InfoExtractor):
|
||||
'Accept': 'application/json',
|
||||
}, fatal=False, impersonate=True) or {}
|
||||
|
||||
status = response.get('room_status')
|
||||
if status != 'public':
|
||||
if error := self._ERROR_MAP.get(status):
|
||||
raise ExtractorError(error, expected=True)
|
||||
self.report_warning('Falling back to webpage extraction')
|
||||
return None
|
||||
|
||||
m3u8_url = response.get('url')
|
||||
if not m3u8_url:
|
||||
self.raise_geo_restricted()
|
||||
status = response.get('room_status')
|
||||
if error := self._ERROR_MAP.get(status):
|
||||
raise ExtractorError(error, expected=True)
|
||||
if status == 'public':
|
||||
self.raise_geo_restricted()
|
||||
self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction')
|
||||
return None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -21,7 +21,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||
'channel': '진짜도현',
|
||||
'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1705510344,
|
||||
'upload_date': '20240117',
|
||||
'live_status': 'is_live',
|
||||
@@ -98,7 +98,7 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
'channel': '침착맨',
|
||||
'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 15577,
|
||||
'timestamp': 1702970505.417,
|
||||
'upload_date': '20231219',
|
||||
@@ -115,7 +115,7 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
'channel': '라디유radiyu',
|
||||
'channel_id': '68f895c59a1043bc5019b5e08c83a5c5',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 95,
|
||||
'timestamp': 1703102631.722,
|
||||
'upload_date': '20231220',
|
||||
@@ -131,12 +131,30 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
'channel': '강지',
|
||||
'channel_id': 'b5ed5db484d04faf4d150aedd362f34b',
|
||||
'channel_is_verified': True,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 4433,
|
||||
'timestamp': 1703307460.214,
|
||||
'upload_date': '20231223',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# video_status == 'NONE' but is downloadable
|
||||
'url': 'https://chzzk.naver.com/video/6325166',
|
||||
'info_dict': {
|
||||
'id': '6325166',
|
||||
'ext': 'mp4',
|
||||
'title': '와이프 숙제빼주기',
|
||||
'channel': '이 다',
|
||||
'channel_id': '0076a519f147ee9fd0959bf02f9571ca',
|
||||
'channel_is_verified': False,
|
||||
'view_count': int,
|
||||
'duration': 28167,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1742139216.86,
|
||||
'upload_date': '20250316',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -147,11 +165,7 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
|
||||
live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
|
||||
video_status = video_meta.get('vodStatus')
|
||||
if video_status == 'UPLOAD':
|
||||
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
|
||||
elif video_status == 'ABR_HLS':
|
||||
if video_status == 'ABR_HLS':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
|
||||
video_id, query={
|
||||
@@ -161,10 +175,17 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
'cpl': 'en_US',
|
||||
})
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
|
||||
formats, subtitles = [], {}
|
||||
live_status = 'post_live' if live_status == 'was_live' else None
|
||||
fatal = video_status == 'UPLOAD'
|
||||
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id, fatal=fatal)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(playback, ('media', 0, 'path')), video_id, 'mp4', m3u8_id='hls', fatal=fatal)
|
||||
if formats and video_status != 'UPLOAD':
|
||||
self.write_debug(f'Video found with status: "{video_status}"')
|
||||
elif not formats:
|
||||
self.raise_no_formats(
|
||||
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
|
||||
formats, subtitles = [], {}
|
||||
live_status = 'post_live' if live_status == 'was_live' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -2,7 +2,6 @@ import base64
|
||||
import collections
|
||||
import functools
|
||||
import getpass
|
||||
import hashlib
|
||||
import http.client
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
@@ -30,6 +29,7 @@ from ..compat import (
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..downloader.hls import HlsFD
|
||||
from ..globals import plugin_ies_overrides
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
@@ -78,7 +78,6 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_m3u8_attributes,
|
||||
parse_resolution,
|
||||
sanitize_filename,
|
||||
sanitize_url,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
@@ -100,6 +99,7 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from ..utils._utils import _request_dump_filename
|
||||
|
||||
|
||||
class InfoExtractor:
|
||||
@@ -201,6 +201,11 @@ class InfoExtractor:
|
||||
fragment_base_url
|
||||
* "duration" (optional, int or float)
|
||||
* "filesize" (optional, int)
|
||||
* hls_media_playlist_data
|
||||
The M3U8 media playlist data as a string.
|
||||
Only use if the data must be modified during extraction and
|
||||
the native HLS downloader should bypass requesting the URL.
|
||||
Does not apply if ffmpeg is used as external downloader
|
||||
* is_from_start Is a live format that can be downloaded
|
||||
from the start. Boolean
|
||||
* preference Order number of this format. If this field is
|
||||
@@ -1017,23 +1022,6 @@ class InfoExtractor:
|
||||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||
expected=True)
|
||||
|
||||
def _request_dump_filename(self, url, video_id, data=None):
|
||||
if data is not None:
|
||||
data = hashlib.md5(data).hexdigest()
|
||||
basen = join_nonempty(video_id, data, url, delim='_')
|
||||
trim_length = self.get_param('trim_file_name') or 240
|
||||
if len(basen) > trim_length:
|
||||
h = '___' + hashlib.md5(basen.encode()).hexdigest()
|
||||
basen = basen[:trim_length - len(h)] + h
|
||||
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if os.name == 'nt':
|
||||
absfilepath = os.path.abspath(filename)
|
||||
if len(absfilepath) > 259:
|
||||
filename = fR'\\?\{absfilepath}'
|
||||
return filename
|
||||
|
||||
def __decode_webpage(self, webpage_bytes, encoding, headers):
|
||||
if not encoding:
|
||||
encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
|
||||
@@ -1062,7 +1050,9 @@ class InfoExtractor:
|
||||
if self.get_param('write_pages'):
|
||||
if isinstance(url_or_request, Request):
|
||||
data = self._create_request(url_or_request, data).data
|
||||
filename = self._request_dump_filename(urlh.url, video_id, data)
|
||||
filename = _request_dump_filename(
|
||||
urlh.url, video_id, data,
|
||||
trim_length=self.get_param('trim_file_name'))
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
@@ -1123,7 +1113,9 @@ class InfoExtractor:
|
||||
impersonate=None, require_impersonation=False):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
||||
filename = _request_dump_filename(
|
||||
url_or_request.url, video_id, url_or_request.data,
|
||||
trim_length=self.get_param('trim_file_name'))
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
@@ -1854,12 +1846,26 @@ class InfoExtractor:
|
||||
|
||||
@staticmethod
|
||||
def _remove_duplicate_formats(formats):
|
||||
format_urls = set()
|
||||
seen_urls = set()
|
||||
seen_fragment_urls = set()
|
||||
unique_formats = []
|
||||
for f in formats:
|
||||
if f['url'] not in format_urls:
|
||||
format_urls.add(f['url'])
|
||||
fragments = f.get('fragments')
|
||||
if callable(fragments):
|
||||
unique_formats.append(f)
|
||||
|
||||
elif fragments:
|
||||
fragment_urls = frozenset(
|
||||
fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path'])
|
||||
for fragment in fragments)
|
||||
if fragment_urls not in seen_fragment_urls:
|
||||
seen_fragment_urls.add(fragment_urls)
|
||||
unique_formats.append(f)
|
||||
|
||||
elif f['url'] not in seen_urls:
|
||||
seen_urls.add(f['url'])
|
||||
unique_formats.append(f)
|
||||
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||
@@ -2929,8 +2935,7 @@ class InfoExtractor:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(
|
||||
float_or_none(period_duration, segment_duration, default=0)))
|
||||
representation_ms_info['total_number'] = math.ceil(float_or_none(period_duration, segment_duration, default=0))
|
||||
representation_ms_info['fragments'] = [{
|
||||
media_location_key: media_template % {
|
||||
'Number': segment_number,
|
||||
@@ -3789,7 +3794,7 @@ class InfoExtractor:
|
||||
def mark_watched(self, *args, **kwargs):
|
||||
if not self.get_param('mark_watched', False):
|
||||
return
|
||||
if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed:
|
||||
if (self.supports_login() and self._get_login_info()[0] is not None) or self._cookies_passed:
|
||||
self._mark_watched(*args, **kwargs)
|
||||
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
@@ -3949,14 +3954,18 @@ class InfoExtractor:
|
||||
def __init_subclass__(cls, *, plugin_name=None, **kwargs):
|
||||
if plugin_name:
|
||||
mro = inspect.getmro(cls)
|
||||
super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
|
||||
cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key
|
||||
cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}'
|
||||
next_mro_class = super_class = mro[mro.index(cls) + 1]
|
||||
|
||||
while getattr(super_class, '__wrapped__', None):
|
||||
super_class = super_class.__wrapped__
|
||||
setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
|
||||
_PLUGIN_OVERRIDES[super_class].append(cls)
|
||||
|
||||
if not any(override.PLUGIN_NAME == plugin_name for override in plugin_ies_overrides.value[super_class]):
|
||||
cls.__wrapped__ = next_mro_class
|
||||
cls.PLUGIN_NAME, cls.ie_key = plugin_name, next_mro_class.ie_key
|
||||
cls.IE_NAME = f'{next_mro_class.IE_NAME}+{plugin_name}'
|
||||
|
||||
setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
|
||||
plugin_ies_overrides.value[super_class].append(cls)
|
||||
return super().__init_subclass__(**kwargs)
|
||||
|
||||
|
||||
@@ -4012,6 +4021,3 @@ class UnsupportedURLIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise UnsupportedError(url)
|
||||
|
||||
|
||||
_PLUGIN_OVERRIDES = collections.defaultdict(list)
|
||||
|
||||
@@ -1,692 +0,0 @@
|
||||
import base64
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_BASE_URL = 'https://www.crunchyroll.com'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27'
|
||||
_REFRESH_TOKEN = None
|
||||
_AUTH_HEADERS = None
|
||||
_AUTH_EXPIRY = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
|
||||
't-kdgp2h8c3jub8fn0fq',
|
||||
'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
|
||||
)).encode()).decode()
|
||||
_IS_PREMIUM = None
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
'': 'en-US',
|
||||
'es': 'es-419',
|
||||
'es-es': 'es-ES',
|
||||
'fr': 'fr-FR',
|
||||
'it': 'it-IT',
|
||||
'pt-br': 'pt-BR',
|
||||
'pt-pt': 'pt-PT',
|
||||
'ru': 'ru-RU',
|
||||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
def _set_auth_info(self, response):
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
|
||||
try:
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
|
||||
headers=headers, data=urlencode_postdata(data), impersonate=True)
|
||||
except ExtractorError as error:
|
||||
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
|
||||
raise
|
||||
if target := error.cause.response.extensions.get('impersonate'):
|
||||
raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare. '
|
||||
'Install the required impersonation dependency if possible, '
|
||||
'or else navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if not CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
return
|
||||
|
||||
try:
|
||||
login_response = self._request_token(
|
||||
headers={'Authorization': self._BASIC_AUTH}, data={
|
||||
'username': username,
|
||||
'password': password,
|
||||
'grant_type': 'password',
|
||||
'scope': 'offline_access',
|
||||
}, note='Logging in', errnote='Failed to log in')
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
|
||||
self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
|
||||
self._set_auth_info(login_response)
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
|
||||
return
|
||||
|
||||
auth_headers = {'Authorization': self._BASIC_AUTH}
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
data = {
|
||||
'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
|
||||
'grant_type': 'refresh_token',
|
||||
'scope': 'offline_access',
|
||||
}
|
||||
else:
|
||||
data = {'grant_type': 'client_id'}
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._request_token(auth_headers, data)
|
||||
except ExtractorError as error:
|
||||
username, password = self._get_login_info()
|
||||
if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
|
||||
raise
|
||||
self.to_screen('Refresh token has expired. Re-logging in')
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = None
|
||||
self.cache.store(self._NETRC_MACHINE, username, None)
|
||||
self._perform_login(username, password)
|
||||
return
|
||||
|
||||
self._set_auth_info(auth_response)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
|
||||
|
||||
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
|
||||
self._update_auth()
|
||||
|
||||
if not endpoint.startswith('/'):
|
||||
endpoint = f'/{endpoint}'
|
||||
|
||||
query = query.copy()
|
||||
locale = self._locale_from_language(lang)
|
||||
if locale:
|
||||
query['locale'] = locale
|
||||
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
|
||||
|
||||
def _call_api(self, path, internal_id, lang, note='api', query={}):
|
||||
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
|
||||
path = f'/content/v2/{self._API_ENDPOINT}/{path}'
|
||||
|
||||
try:
|
||||
result = self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
if not result:
|
||||
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
|
||||
return result
|
||||
|
||||
def _extract_chapters(self, internal_id):
|
||||
# if no skip events are available, a 403 xml error is returned
|
||||
skip_events = self._download_json(
|
||||
f'https://static.crunchyroll.com/skip-events/production/{internal_id}.json',
|
||||
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
|
||||
if not skip_events:
|
||||
return None
|
||||
|
||||
chapters = []
|
||||
for event in ('recap', 'intro', 'credits', 'preview'):
|
||||
start = traverse_obj(skip_events, (event, 'start', {float_or_none}))
|
||||
end = traverse_obj(skip_events, (event, 'end', {float_or_none}))
|
||||
# some chapters have no start and/or ending time, they will just be ignored
|
||||
if start is None or end is None:
|
||||
continue
|
||||
chapters.append({'title': event.capitalize(), 'start_time': start, 'end_time': end})
|
||||
|
||||
return chapters
|
||||
|
||||
def _extract_stream(self, identifier, display_id=None):
|
||||
if not display_id:
|
||||
display_id = identifier
|
||||
|
||||
self._update_auth()
|
||||
headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT}
|
||||
try:
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers)
|
||||
except ExtractorError as error:
|
||||
if self.get_param('ignore_no_formats_error'):
|
||||
self.report_warning(error.orig_msg)
|
||||
return [], {}
|
||||
elif isinstance(error.cause, HTTPError) and error.cause.status == 420:
|
||||
raise ExtractorError(
|
||||
'You have reached the rate-limit for active streams; try again later', expected=True)
|
||||
raise
|
||||
|
||||
available_formats = {'': ('', '', stream_response['url'])}
|
||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||
available_formats[hardsub_lang] = (f'hardsub-{hardsub_lang}', hardsub_lang, stream['url'])
|
||||
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||
hardsub_langs = [lang for lang in available_formats if lang]
|
||||
if hardsub_langs and 'all' not in requested_hardsubs:
|
||||
full_format_langs = set(requested_hardsubs)
|
||||
self.to_screen(f'Available hardsub languages: {", ".join(hardsub_langs)}')
|
||||
self.to_screen(
|
||||
'To extract formats of a hardsub language, use '
|
||||
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
|
||||
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
|
||||
only_once=True)
|
||||
else:
|
||||
full_format_langs = set(map(str.lower, available_formats))
|
||||
|
||||
audio_locale = traverse_obj(stream_response, ('audioLocale', {str}))
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
formats, subtitles = [], {}
|
||||
for format_id, hardsub_lang, stream_url in available_formats.values():
|
||||
if hardsub_lang.lower() in full_format_langs:
|
||||
adaptive_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||
stream_url, display_id, mpd_id=format_id, headers=CrunchyrollBaseIE._AUTH_HEADERS,
|
||||
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
|
||||
self._merge_subtitles(dash_subs, target=subtitles)
|
||||
else:
|
||||
continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_locale
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
|
||||
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
|
||||
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
|
||||
|
||||
# Invalidate stream token to avoid rate-limit
|
||||
error_msg = 'Unable to invalidate stream token; you may experience rate-limiting'
|
||||
if stream_token := stream_response.get('token'):
|
||||
self._request_webpage(Request(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive',
|
||||
headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False)
|
||||
else:
|
||||
self.report_warning(error_msg)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
|
||||
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
|
||||
_API_ENDPOINT = 'cms'
|
||||
_CMS_EXPIRY = None
|
||||
|
||||
def _call_cms_api_signed(self, path, internal_id, lang, note='api'):
|
||||
if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds():
|
||||
response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web']
|
||||
CrunchyrollCmsBaseIE._CMS_QUERY = {
|
||||
'Policy': response['policy'],
|
||||
'Signature': response['signature'],
|
||||
'Key-Pair-Id': response['key_pair_id'],
|
||||
}
|
||||
CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket']
|
||||
CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10
|
||||
|
||||
if not path.startswith('/cms/v2'):
|
||||
path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}'
|
||||
|
||||
return self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY)
|
||||
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
|
||||
watch/(?!concert|musicvideo)(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
# Premium only
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'info_dict': {
|
||||
'id': 'GY2P1Q98Y',
|
||||
'ext': 'mp4',
|
||||
'duration': 1380.241,
|
||||
'timestamp': 1459632600,
|
||||
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
|
||||
'title': 'World Trigger Episode 73 – To the Future',
|
||||
'upload_date': '20160402',
|
||||
'series': 'World Trigger',
|
||||
'series_id': 'GR757DMKY',
|
||||
'season': 'World Trigger',
|
||||
'season_id': 'GR9P39NJ6',
|
||||
'season_number': 1,
|
||||
'episode': 'To the Future',
|
||||
'episode_number': 73,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'chapters': 'count:2',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'extractor_args': {'crunchyrollbeta': {'hardsub': ['de-DE']}},
|
||||
'format': 'bv[format_id~=hardsub]',
|
||||
},
|
||||
}, {
|
||||
# Premium only
|
||||
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
||||
'info_dict': {
|
||||
'id': 'GYE5WKQGR',
|
||||
'ext': 'mp4',
|
||||
'duration': 366.459,
|
||||
'timestamp': 1476788400,
|
||||
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
|
||||
'title': 'SHELTER – Porter Robinson presents Shelter the Animation',
|
||||
'upload_date': '20161018',
|
||||
'series': 'SHELTER',
|
||||
'series_id': 'GYGG09WWY',
|
||||
'season': 'SHELTER',
|
||||
'season_id': 'GR09MGK4R',
|
||||
'season_number': 1,
|
||||
'episode': 'Porter Robinson presents Shelter the Animation',
|
||||
'episode_number': 0,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard',
|
||||
'info_dict': {
|
||||
'id': 'GJWU2VKK3',
|
||||
'ext': 'mp4',
|
||||
'duration': 1420.054,
|
||||
'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd',
|
||||
'title': 'The Ice Guy and His Cool Female Colleague Episode 1 – Cherry Blossom Meeting and a Coming Blizzard',
|
||||
'series': 'The Ice Guy and His Cool Female Colleague',
|
||||
'series_id': 'GW4HM75NP',
|
||||
'season': 'The Ice Guy and His Cool Female Colleague',
|
||||
'season_id': 'GY9PC21VE',
|
||||
'season_number': 1,
|
||||
'episode': 'Cherry Blossom Meeting and a Coming Blizzard',
|
||||
'episode_number': 1,
|
||||
'chapters': 'count:2',
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'timestamp': 1672839000,
|
||||
'upload_date': '20230104',
|
||||
'age_limit': 14,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GM8F313NQ',
|
||||
'info_dict': {
|
||||
'id': 'GM8F313NQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garakowa -Restore the World-',
|
||||
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
|
||||
'duration': 3996.104,
|
||||
'age_limit': 13,
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'no longer exists',
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
|
||||
'info_dict': {
|
||||
'id': 'G62PEZ2E6',
|
||||
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
|
||||
'age_limit': 13,
|
||||
'duration': 65.138,
|
||||
'title': 'Garakowa -Restore the World-',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# We want to support lazy playlist filtering and movie listings cannot be inside a playlist
|
||||
_RETURN_TYPE = 'video'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
|
||||
# We need to use unsigned API call to allow ratings query string
|
||||
response = traverse_obj(self._call_api(
|
||||
f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict}))
|
||||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
object_type = response.get('type')
|
||||
if object_type == 'episode':
|
||||
result = self._transform_episode_response(response)
|
||||
|
||||
elif object_type == 'movie':
|
||||
result = self._transform_movie_response(response)
|
||||
|
||||
elif object_type == 'movie_listing':
|
||||
first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id'))
|
||||
if not self._yes_playlist(internal_id, first_movie_id):
|
||||
return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id)
|
||||
|
||||
def entries():
|
||||
movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list')
|
||||
for movie_response in traverse_obj(movies, ('data', ...)):
|
||||
yield self.url_result(
|
||||
f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}',
|
||||
CrunchyrollBetaIE, **self._transform_movie_response(movie_response))
|
||||
|
||||
return self.playlist_result(entries(), **self._transform_movie_response(response))
|
||||
|
||||
else:
|
||||
raise ExtractorError(f'Unknown object type {object_type}')
|
||||
|
||||
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
message = f'This {object_type} is for premium members only'
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||
else:
|
||||
self.raise_login_required(message, method='password', metadata_available=True)
|
||||
else:
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
|
||||
result['chapters'] = self._extract_chapters(internal_id)
|
||||
|
||||
def calculate_count(item):
|
||||
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
|
||||
|
||||
result.update(traverse_obj(response, ('rating', {
|
||||
'like_count': ('up', {calculate_count}),
|
||||
'dislike_count': ('down', {calculate_count}),
|
||||
})))
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _transform_episode_response(data):
|
||||
metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {}
|
||||
return {
|
||||
'id': data['id'],
|
||||
'title': ' \u2013 '.join((
|
||||
('{}{}'.format(
|
||||
format_field(metadata, 'season_title'),
|
||||
format_field(metadata, 'episode', ' Episode %s'))),
|
||||
format_field(data, 'title'))),
|
||||
**traverse_obj(data, {
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', 'thumbnail', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'season': ('season_title', {str}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'season_number': ('season_number', ({int}, {float_or_none})),
|
||||
'episode_number': ('sequence_number', ({int}, {float_or_none})),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
'language': ('audio_locale', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _transform_movie_response(data):
|
||||
metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {}
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', 'thumbnail', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {float_or_none(scale=1000)}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
series/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'info_dict': {
|
||||
'id': 'GY19NQ2QR',
|
||||
'title': 'Girl Friend BETA',
|
||||
'description': 'md5:99c1b22ee30a74b536a8277ced8eb750',
|
||||
# XXX: `thumbnail` does not get set from `thumbnails` in playlist
|
||||
# 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
|
||||
def entries():
|
||||
seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons')
|
||||
for season in traverse_obj(seasons_response, ('items', ..., {dict})):
|
||||
episodes_response = self._call_cms_api_signed(
|
||||
f'episodes?season_id={season["id"]}', season['id'], lang, 'episode list')
|
||||
for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})):
|
||||
yield self.url_result(
|
||||
f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}',
|
||||
CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response))
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), internal_id,
|
||||
**traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
'thumbnails': ('images', ..., ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
})))
|
||||
|
||||
|
||||
class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:music'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV5B02C79',
|
||||
'display_id': 'egaono-hana',
|
||||
'title': 'Egaono Hana',
|
||||
'track': 'Egaono Hana',
|
||||
'artists': ['Goose house'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genres': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV88BB7F2C',
|
||||
'display_id': 'crossing-field',
|
||||
'title': 'Crossing Field',
|
||||
'track': 'Crossing Field',
|
||||
'artists': ['LiSA'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genres': ['Anime'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'no longer exists',
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MC2E2AC135',
|
||||
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||
'artists': ['LiSA'],
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
|
||||
'genres': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type')
|
||||
path, name = {
|
||||
'concert': ('concerts', 'concert info'),
|
||||
'musicvideo': ('music_videos', 'music video info'),
|
||||
}[object_type]
|
||||
response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict}))
|
||||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
|
||||
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
|
||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||
else:
|
||||
self.raise_login_required(message, method='password', metadata_available=True)
|
||||
else:
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _transform_music_response(data):
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'display_id': 'slug',
|
||||
'title': 'title',
|
||||
'track': 'title',
|
||||
'artists': ('artist', 'name', all),
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
|
||||
'thumbnails': ('images', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'genres': ('genres', ..., 'displayValue'),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollArtistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:artist'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
artist/(?P<id>\w{10})'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/artist/MA179CB50D',
|
||||
'info_dict': {
|
||||
'id': 'MA179CB50D',
|
||||
'title': 'LiSA',
|
||||
'genres': ['Anime', 'J-Pop', 'Rock'],
|
||||
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
|
||||
},
|
||||
'playlist_mincount': 83,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||
response = traverse_obj(self._call_api(
|
||||
f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0))
|
||||
|
||||
def entries():
|
||||
for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]:
|
||||
for internal_id in traverse_obj(response, (attribute, ...)):
|
||||
yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id)
|
||||
|
||||
return self.playlist_result(entries(), **self._transform_artist_response(response))
|
||||
|
||||
@staticmethod
|
||||
def _transform_artist_response(data):
|
||||
return {
|
||||
'id': data['id'],
|
||||
**traverse_obj(data, {
|
||||
'title': 'name',
|
||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||
'thumbnails': ('images', ..., ..., {
|
||||
'url': ('source', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'genres': ('genres', ..., 'displayValue'),
|
||||
}),
|
||||
}
|
||||
@@ -1,12 +1,9 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CultureUnpluggedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/documentary/watch-online/play/(?P<id>\d+)(?:/(?P<display_id>[^/]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/(?:documentary/watch-online/)?play/(?P<id>\d+)(?:/(?P<display_id>[^/#?]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West',
|
||||
'md5': 'ac6c093b089f7d05e79934dcb3d228fc',
|
||||
@@ -15,12 +12,25 @@ class CultureUnpluggedIE(InfoExtractor):
|
||||
'display_id': 'The-Next--Best-West',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Next, Best West',
|
||||
'description': 'md5:0423cd00833dea1519cf014e9d0903b1',
|
||||
'description': 'md5:770033a3b7c2946a3bcfb7f1c6fb7045',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'creator': 'Coldstream Creative',
|
||||
'creators': ['Coldstream Creative'],
|
||||
'duration': 2203,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cultureunplugged.com/play/2833/Koi-Sunta-Hai--Journeys-with-Kumar---Kabir--Someone-is-Listening-',
|
||||
'md5': 'dc2014bc470dfccba389a1c934fa29fa',
|
||||
'info_dict': {
|
||||
'id': '2833',
|
||||
'display_id': 'Koi-Sunta-Hai--Journeys-with-Kumar---Kabir--Someone-is-Listening-',
|
||||
'ext': 'mp4',
|
||||
'title': 'Koi Sunta Hai: Journeys with Kumar & Kabir (Someone is Listening)',
|
||||
'description': 'md5:fa94ac934927c98660362b8285b2cda5',
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://s3.amazonaws.com/cdn.cultureunplugged.com/thumbnails_16_9/lg/2833.jpg',
|
||||
'creators': ['Srishti'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662',
|
||||
'only_matching': True,
|
||||
@@ -31,9 +41,6 @@ class CultureUnpluggedIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
# request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
|
||||
self._request_webpage(HEADRequest(
|
||||
'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
|
||||
movie_data = self._download_json(
|
||||
f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id)
|
||||
|
||||
|
||||
@@ -1,35 +1,40 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
IE_NAME = 'cwtv'
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch|guid)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/all-american-homecoming/ready-or-not/?play=d848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'url': 'https://www.cwtv.com/shows/continuum/a-stitch-in-time/?play=9149a1e1-4cb2-46d7-81b2-47d35bbd332b',
|
||||
'info_dict': {
|
||||
'id': 'd848488f-f62a-40fd-af1f-6440b1821aab',
|
||||
'id': '9149a1e1-4cb2-46d7-81b2-47d35bbd332b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ready Or Not',
|
||||
'description': 'Simone is concerned about changes taking place at Bringston; JR makes a decision about his future.',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 2547,
|
||||
'timestamp': 1720519200,
|
||||
'title': 'A Stitch in Time',
|
||||
'description': r're:(?s)City Protective Services officer Kiera Cameron is transported from 2077.+',
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
'duration': 2632,
|
||||
'timestamp': 1736928000,
|
||||
'uploader': 'CWTV',
|
||||
'chapters': 'count:6',
|
||||
'series': 'All American: Homecoming',
|
||||
'season_number': 3,
|
||||
'chapters': 'count:5',
|
||||
'series': 'Continuum',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20240709',
|
||||
'season': 'Season 3',
|
||||
'age_limit': 14,
|
||||
'upload_date': '20250115',
|
||||
'season': 'Season 1',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
@@ -42,7 +47,7 @@ class CWTVIE(InfoExtractor):
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'ext': 'mp4',
|
||||
'title': 'Legends of Yesterday',
|
||||
'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.',
|
||||
'description': r're:(?s)Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote.+',
|
||||
'duration': 2665,
|
||||
'series': 'Arrow',
|
||||
'season_number': 4,
|
||||
@@ -71,7 +76,7 @@ class CWTVIE(InfoExtractor):
|
||||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
'chapters': 'count:4',
|
||||
'episode': 'Episode 20',
|
||||
'season': 'Season 11',
|
||||
@@ -89,14 +94,17 @@ class CWTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.cwtv.com/movies/play/?guid=0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
f'https://images.cwtv.com/feed/mobileapp/video-meta/apiversion_12/guid_{video_id}', video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
f'https://images.cwtv.com/feed/app-2/video-meta/apiversion_22/device_android/guid_{video_id}', video_id)
|
||||
if traverse_obj(data, 'result') != 'ok':
|
||||
raise ExtractorError(traverse_obj(data, (('error_msg', 'msg'), {str}, any)), expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = update_url_query(
|
||||
@@ -123,3 +131,50 @@ class CWTVIE(InfoExtractor):
|
||||
'ie_key': 'ThePlatform',
|
||||
'thumbnail': video_data.get('large_thumbnail'),
|
||||
}
|
||||
|
||||
|
||||
class CWTVMovieIE(InfoExtractor):
|
||||
IE_NAME = 'cwtv:movie'
|
||||
_VALID_URL = r'https?://(?:www\.)?cwtv\.com/shows/(?P<id>[\w-]+)/?\?(?:[^#]+&)?viewContext=Movies'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/the-crush/?viewContext=Movies+Swimlane',
|
||||
'info_dict': {
|
||||
'id': '0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Crush',
|
||||
'upload_date': '20241112',
|
||||
'description': 'md5:1549acd90dff4a8273acd7284458363e',
|
||||
'chapters': 'count:9',
|
||||
'timestamp': 1731398400,
|
||||
'age_limit': 16,
|
||||
'duration': 5337,
|
||||
'series': 'The Crush',
|
||||
'season': 'Season 1',
|
||||
'uploader': 'CWTV',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_url = (
|
||||
self._html_search_meta('al:ios:url', webpage, default=None)
|
||||
or self._html_search_meta('al:android:url', webpage, default=None))
|
||||
video_id = (
|
||||
traverse_obj(parse_qs(app_url), ('video_id', 0, {lambda x: re.fullmatch(self._UUID_RE, x)}, 0))
|
||||
or self._search_regex([
|
||||
rf'CWTV\.Site\.curPlayingGUID\s*=\s*["\']({self._UUID_RE})',
|
||||
rf'CWTV\.Site\.viewInAppURL\s*=\s*["\']/shows/[\w-]+/watch-in-app/\?play=({self._UUID_RE})',
|
||||
], webpage, 'video ID'))
|
||||
|
||||
return self.url_result(
|
||||
f'https://www.cwtv.com/shows/{display_id}/{display_id}/?play={video_id}', CWTVIE, video_id)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import functools
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
@@ -51,6 +52,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, { # /uspaes/ in hls_url
|
||||
'url': 'https://iframe.dacast.com/vod/f9823fc6-faba-b98f-0d00-4a7b50a58c5b/348c5c84-b6af-4859-bb9d-1d01009c795b',
|
||||
'info_dict': {
|
||||
'id': '348c5c84-b6af-4859-bb9d-1d01009c795b',
|
||||
'ext': 'mp4',
|
||||
'title': 'pl1-edyta-rubas-211124.mp4',
|
||||
'uploader_id': 'f9823fc6-faba-b98f-0d00-4a7b50a58c5b',
|
||||
'thumbnail': 'https://universe-files.dacast.com/4d0bd042-a536-752d-fc34-ad2fa44bbcbb.png',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
|
||||
@@ -74,6 +84,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@functools.cached_property
|
||||
def _usp_signing_secret(self):
|
||||
player_js = self._download_webpage(
|
||||
'https://player.dacast.com/js/player.js', None, 'Downloading player JS')
|
||||
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
||||
return self._search_regex(
|
||||
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
||||
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||
@@ -94,10 +113,10 @@ class DacastVODIE(DacastBaseIE):
|
||||
if 'DRM_EXT' in hls_url:
|
||||
self.report_drm(video_id)
|
||||
elif '/uspaes/' in hls_url:
|
||||
# From https://player.dacast.com/js/player.js
|
||||
# Ref: https://player.dacast.com/js/player.js
|
||||
ts = int(time.time())
|
||||
signature = hashlib.sha1(
|
||||
f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex()
|
||||
f'{10413792000 - ts}{ts}{self._usp_signing_secret}'.encode()).digest().hex()
|
||||
hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
|
||||
|
||||
for retry in self.RetryManager():
|
||||
|
||||
@@ -100,7 +100,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:https?:)?//
|
||||
(?:
|
||||
dai\.ly/|
|
||||
(?:
|
||||
@@ -116,7 +116,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||
_EMBED_REGEX = [rf'(?ix)<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)["\'](?P<url>{_VALID_URL[5:]})']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
||||
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
||||
@@ -261,6 +261,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080',
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||
@@ -288,6 +289,44 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xry80.html?video=x8vu47w
|
||||
'url': 'https://www.metatube.com/en/videos/546765/This-frogs-decorates-Christmas-tree/',
|
||||
'info_dict': {
|
||||
'id': 'x8vu47w',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Metatube',
|
||||
'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080',
|
||||
'upload_date': '20240326',
|
||||
'view_count': int,
|
||||
'timestamp': 1711496732,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x2xpy74',
|
||||
'title': 'Está lindas ranitas ponen su arbolito',
|
||||
'duration': 28,
|
||||
'description': 'Que lindura',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
# //geo.dailymotion.com/player/xysxq.html?video=k2Y4Mjp7krAF9iCuINM
|
||||
'url': 'https://lcp.fr/programmes/avant-la-catastrophe-la-naissance-de-la-dictature-nazie-1933-1936-346819',
|
||||
'info_dict': {
|
||||
'id': 'k2Y4Mjp7krAF9iCuINM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Avant la catastrophe la naissance de la dictature nazie 1933 -1936',
|
||||
'description': 'md5:7b620d5e26edbe45f27bbddc1c0257c1',
|
||||
'uploader': 'LCP Assemblée nationale',
|
||||
'uploader_id': 'xbz33d',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 3220,
|
||||
'thumbnail': 'https://s1.dmcdn.net/v/Xvumk1djJBUZfjj2a/x1080',
|
||||
'tags': [],
|
||||
'timestamp': 1739919947,
|
||||
'upload_date': '20250218',
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_COMMON_MEDIA_FIELDS = '''description
|
||||
@@ -302,7 +341,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
yield 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
|
||||
@@ -1,142 +0,0 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
|
||||
class DeezerBaseInfoExtractor(InfoExtractor):
|
||||
def get_data(self, url):
|
||||
if not self.get_param('test'):
|
||||
self.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
data_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, data_id)
|
||||
geoblocking_msg = self._html_search_regex(
|
||||
r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
|
||||
default=None)
|
||||
if geoblocking_msg is not None:
|
||||
raise ExtractorError(
|
||||
f'Deezer said: {geoblocking_msg}', expected=True)
|
||||
|
||||
data_json = self._search_regex(
|
||||
(r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>',
|
||||
r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
|
||||
webpage, 'data JSON')
|
||||
data = json.loads(data_json)
|
||||
return data_id, webpage, data
|
||||
|
||||
|
||||
class DeezerPlaylistIE(DeezerBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?playlist/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.deezer.com/playlist/176747451',
|
||||
'info_dict': {
|
||||
'id': '176747451',
|
||||
'title': 'Best!',
|
||||
'uploader': 'anonymous',
|
||||
'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 29,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, webpage, data = self.get_data(url)
|
||||
|
||||
playlist_title = data.get('DATA', {}).get('TITLE')
|
||||
playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME')
|
||||
playlist_thumbnail = self._search_regex(
|
||||
r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage,
|
||||
'playlist thumbnail')
|
||||
|
||||
entries = []
|
||||
for s in data.get('SONGS', {}).get('data'):
|
||||
formats = [{
|
||||
'format_id': 'preview',
|
||||
'url': s.get('MEDIA', [{}])[0].get('HREF'),
|
||||
'preference': -100, # Only the first 30 seconds
|
||||
'ext': 'mp3',
|
||||
}]
|
||||
artists = ', '.join(
|
||||
orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
|
||||
entries.append({
|
||||
'id': s.get('SNG_ID'),
|
||||
'duration': int_or_none(s.get('DURATION')),
|
||||
'title': '{} - {}'.format(artists, s.get('SNG_TITLE')),
|
||||
'uploader': s.get('ART_NAME'),
|
||||
'uploader_id': s.get('ART_ID'),
|
||||
'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': playlist_title,
|
||||
'uploader': playlist_uploader,
|
||||
'thumbnail': playlist_thumbnail,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class DeezerAlbumIE(DeezerBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?album/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.deezer.com/fr/album/67505622',
|
||||
'info_dict': {
|
||||
'id': '67505622',
|
||||
'title': 'Last Week',
|
||||
'uploader': 'Home Brew',
|
||||
'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id, webpage, data = self.get_data(url)
|
||||
|
||||
album_title = data.get('DATA', {}).get('ALB_TITLE')
|
||||
album_uploader = data.get('DATA', {}).get('ART_NAME')
|
||||
album_thumbnail = self._search_regex(
|
||||
r'<img id="naboo_album_image".*?src="([^"]+)"', webpage,
|
||||
'album thumbnail')
|
||||
|
||||
entries = []
|
||||
for s in data.get('SONGS', {}).get('data'):
|
||||
formats = [{
|
||||
'format_id': 'preview',
|
||||
'url': s.get('MEDIA', [{}])[0].get('HREF'),
|
||||
'preference': -100, # Only the first 30 seconds
|
||||
'ext': 'mp3',
|
||||
}]
|
||||
artists = ', '.join(
|
||||
orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
|
||||
entries.append({
|
||||
'id': s.get('SNG_ID'),
|
||||
'duration': int_or_none(s.get('DURATION')),
|
||||
'title': '{} - {}'.format(artists, s.get('SNG_TITLE')),
|
||||
'uploader': s.get('ART_NAME'),
|
||||
'uploader_id': s.get('ART_ID'),
|
||||
'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
|
||||
'formats': formats,
|
||||
'track': s.get('SNG_TITLE'),
|
||||
'track_number': int_or_none(s.get('TRACK_NUMBER')),
|
||||
'track_id': s.get('SNG_ID'),
|
||||
'artist': album_uploader,
|
||||
'album': album_title,
|
||||
'album_artist': album_uploader,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': album_id,
|
||||
'title': album_title,
|
||||
'uploader': album_uploader,
|
||||
'thumbnail': album_thumbnail,
|
||||
'entries': entries,
|
||||
}
|
||||
130
yt_dlp/extractor/digiview.py
Normal file
130
yt_dlp/extractor/digiview.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import clean_html, int_or_none, traverse_obj, url_or_none, urlencode_postdata
|
||||
|
||||
|
||||
class DigiviewIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ladigitale\.dev/digiview/#/v/(?P<id>[0-9a-f]+)'
|
||||
_TESTS = [{
|
||||
# normal video
|
||||
'url': 'https://ladigitale.dev/digiview/#/v/67a8e50aee2ec',
|
||||
'info_dict': {
|
||||
'id': '67a8e50aee2ec',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny 60fps 4K - Official Blender Foundation Short Film',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/aqz-KE-bpKQ/hqdefault.jpg',
|
||||
'upload_date': '20141110',
|
||||
'playable_in_embed': True,
|
||||
'duration': 635,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel': 'Blender',
|
||||
'license': 'Creative Commons Attribution license (reuse allowed)',
|
||||
'like_count': int,
|
||||
'tags': 'count:8',
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg',
|
||||
'channel_follower_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg',
|
||||
'uploader_id': '@BlenderOfficial',
|
||||
'description': 'md5:8f3ed18a53a1bb36cbb3b70a15782fd0',
|
||||
'categories': ['Film & Animation'],
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'section_end': 635,
|
||||
'uploader': 'Blender',
|
||||
'timestamp': 1415628355,
|
||||
'uploader_url': 'https://www.youtube.com/@BlenderOfficial',
|
||||
'age_limit': 0,
|
||||
'section_start': 0,
|
||||
'availability': 'public',
|
||||
},
|
||||
}, {
|
||||
# cut video
|
||||
'url': 'https://ladigitale.dev/digiview/#/v/67a8e51d0dd58',
|
||||
'info_dict': {
|
||||
'id': '67a8e51d0dd58',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny 60fps 4K - Official Blender Foundation Short Film',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/aqz-KE-bpKQ/hqdefault.jpg',
|
||||
'upload_date': '20141110',
|
||||
'playable_in_embed': True,
|
||||
'duration': 5,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel': 'Blender',
|
||||
'license': 'Creative Commons Attribution license (reuse allowed)',
|
||||
'like_count': int,
|
||||
'tags': 'count:8',
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg',
|
||||
'channel_follower_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg',
|
||||
'uploader_id': '@BlenderOfficial',
|
||||
'description': 'md5:8f3ed18a53a1bb36cbb3b70a15782fd0',
|
||||
'categories': ['Film & Animation'],
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'section_end': 10,
|
||||
'uploader': 'Blender',
|
||||
'timestamp': 1415628355,
|
||||
'uploader_url': 'https://www.youtube.com/@BlenderOfficial',
|
||||
'age_limit': 0,
|
||||
'section_start': 5,
|
||||
'availability': 'public',
|
||||
},
|
||||
}, {
|
||||
# changed title
|
||||
'url': 'https://ladigitale.dev/digiview/#/v/67a8ea5644d7a',
|
||||
'info_dict': {
|
||||
'id': '67a8ea5644d7a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny (with title changed)',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/aqz-KE-bpKQ/hqdefault.jpg',
|
||||
'upload_date': '20141110',
|
||||
'playable_in_embed': True,
|
||||
'duration': 5,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel': 'Blender',
|
||||
'license': 'Creative Commons Attribution license (reuse allowed)',
|
||||
'like_count': int,
|
||||
'tags': 'count:8',
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg',
|
||||
'channel_follower_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg',
|
||||
'uploader_id': '@BlenderOfficial',
|
||||
'description': 'md5:8f3ed18a53a1bb36cbb3b70a15782fd0',
|
||||
'categories': ['Film & Animation'],
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'section_end': 15,
|
||||
'uploader': 'Blender',
|
||||
'timestamp': 1415628355,
|
||||
'uploader_url': 'https://www.youtube.com/@BlenderOfficial',
|
||||
'age_limit': 0,
|
||||
'section_start': 10,
|
||||
'availability': 'public',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://ladigitale.dev/digiview/inc/recuperer_video.php', video_id,
|
||||
data=urlencode_postdata({'id': video_id}))
|
||||
|
||||
clip_id = video_data['videoId']
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/watch?v={clip_id}',
|
||||
YoutubeIE, video_id, url_transparent=True,
|
||||
**traverse_obj(video_data, {
|
||||
'section_start': ('debut', {int_or_none}),
|
||||
'section_end': ('fin', {int_or_none}),
|
||||
'description': ('description', {clean_html}, filter),
|
||||
'title': ('titre', {str}),
|
||||
'thumbnail': ('vignette', {url_or_none}),
|
||||
'view_count': ('vues', {int_or_none}),
|
||||
}),
|
||||
)
|
||||
@@ -1,10 +1,24 @@
|
||||
from .zdf import ZDFIE
|
||||
from .zdf import ZDFBaseIE
|
||||
|
||||
|
||||
class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
|
||||
class DreiSatIE(ZDFBaseIE):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.3sat.de/dokumentation/reise/traumziele-suedostasiens-die-philippinen-und-vietnam-102.html',
|
||||
'info_dict': {
|
||||
'id': '231124_traumziele_philippinen_und_vietnam_dokreise',
|
||||
'ext': 'mp4',
|
||||
'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'description': 'md5:26329ce5197775b596773b939354079d',
|
||||
'duration': 2625.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148',
|
||||
'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95',
|
||||
'timestamp': 1738593000,
|
||||
'upload_date': '20250203',
|
||||
},
|
||||
}, {
|
||||
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
|
||||
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
|
||||
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||
@@ -17,6 +31,7 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1608604200,
|
||||
'upload_date': '20201222',
|
||||
},
|
||||
'skip': '410 Gone',
|
||||
}, {
|
||||
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
|
||||
'info_dict': {
|
||||
@@ -30,6 +45,7 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
|
||||
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
|
||||
@@ -39,3 +55,14 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
|
||||
'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
if webpage:
|
||||
player = self._extract_player(webpage, url, fatal=False)
|
||||
if player:
|
||||
return self._extract_regular(url, player, video_id)
|
||||
|
||||
return self._extract_mobile(video_id)
|
||||
|
||||
@@ -48,32 +48,30 @@ class DropboxIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = urllib.parse.unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
password = self.get_param('videopassword')
|
||||
content_id = None
|
||||
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if '/sm/password' in part:
|
||||
webpage = self._download_webpage(
|
||||
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
|
||||
content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID')
|
||||
break
|
||||
|
||||
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
if password:
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
|
||||
'password': password,
|
||||
'url': url,
|
||||
}))
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
if content_id:
|
||||
password = self.get_param('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': content_id,
|
||||
'password': password,
|
||||
'url': update_url(url, scheme='', netloc=''),
|
||||
}))
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
@@ -84,7 +82,7 @@ class DropboxIE(InfoExtractor):
|
||||
has_anonymous_download = self._search_regex(
|
||||
r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
|
||||
r'\n.?(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
|
||||
@@ -135,7 +135,7 @@ class DropoutIE(InfoExtractor):
|
||||
self.raise_login_required(method='any')
|
||||
raise ExtractorError(login_err, expected=True)
|
||||
|
||||
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
|
||||
embed_url = self._html_search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
watch_info = get_element_by_id('watch-info', webpage) or ''
|
||||
|
||||
|
||||
51
yt_dlp/extractor/drtalks.py
Normal file
51
yt_dlp/extractor/drtalks.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DrTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?drtalks\.com/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://drtalks.com/videos/six-pillars-of-resilience-tools-for-managing-stress-and-flourishing/',
|
||||
'info_dict': {
|
||||
'id': '6366193757112',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '6314452011001',
|
||||
'tags': ['resilience'],
|
||||
'description': 'md5:9c6805aee237ee6de8052461855b9dda',
|
||||
'timestamp': 1734546659,
|
||||
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/12/Episode-82-Eva-Selhub-DrTalks-Thumbs.jpg',
|
||||
'title': 'Six Pillars of Resilience: Tools for Managing Stress and Flourishing',
|
||||
'duration': 2800.682,
|
||||
'upload_date': '20241218',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://drtalks.com/videos/the-pcos-puzzle-mastering-metabolic-health-with-marcelle-pick/',
|
||||
'info_dict': {
|
||||
'id': '6364699891112',
|
||||
'ext': 'mp4',
|
||||
'title': 'The PCOS Puzzle: Mastering Metabolic Health with Marcelle Pick',
|
||||
'description': 'md5:e87cbe00ca50135d5702787fc4043aaa',
|
||||
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/11/Episode-34-Marcelle-Pick-OBGYN-NP-DrTalks.jpg',
|
||||
'duration': 3515.2,
|
||||
'tags': ['pcos'],
|
||||
'upload_date': '20241114',
|
||||
'timestamp': 1731592119,
|
||||
'uploader_id': '6314452011001',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
next_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']['video']
|
||||
|
||||
return self.url_result(
|
||||
next_data['videos']['brightcoveVideoLink'], BrightcoveNewIE, video_id,
|
||||
url_transparent=True,
|
||||
**traverse_obj(next_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('videos', 'summury', {str}),
|
||||
'thumbnail': ('featuredImage', 'node', 'sourceUrl', {url_or_none}),
|
||||
}))
|
||||
@@ -5,15 +5,16 @@ from ..utils import (
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class DuoplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
|
||||
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)(?:[/?#]|$)'
|
||||
_TESTS = [{
|
||||
'note': 'Siberi võmm S02E12',
|
||||
'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
|
||||
@@ -34,15 +35,16 @@ class DuoplayIE(InfoExtractor):
|
||||
'episode_number': 12,
|
||||
'episode_id': '24',
|
||||
},
|
||||
'skip': 'No video found',
|
||||
}, {
|
||||
'note': 'Empty title',
|
||||
'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
|
||||
'md5': '6aca68be71112314738dd17cced7f8bf',
|
||||
'md5': 'cba9f5dabf2582b224d80ac44fb80e47',
|
||||
'info_dict': {
|
||||
'id': '17_14',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ühikarotid',
|
||||
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
|
||||
'title': 'Episode 14',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'description': 'md5:4719b418e058c209def41d48b601276e',
|
||||
'upload_date': '20100916',
|
||||
'timestamp': 1284661800,
|
||||
@@ -52,6 +54,8 @@ class DuoplayIE(InfoExtractor):
|
||||
'season_number': 2,
|
||||
'episode_id': '14',
|
||||
'release_year': 2010,
|
||||
'episode': 'Episode 14',
|
||||
'episode_number': 14,
|
||||
},
|
||||
}, {
|
||||
'note': 'Movie without expiry',
|
||||
@@ -68,10 +72,32 @@ class DuoplayIE(InfoExtractor):
|
||||
'timestamp': 1671054000,
|
||||
'release_year': 2018,
|
||||
},
|
||||
'skip': 'No video found',
|
||||
}, {
|
||||
'note': 'Episode url without show name',
|
||||
'url': 'https://duoplay.ee/9644?ep=185',
|
||||
'md5': '63f324b4fe2dbd8194dca16a6d52184a',
|
||||
'info_dict': {
|
||||
'id': '9644_185',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 185',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'description': 'md5:ed25ba4e9e5d54bc291a4a0cdd241467',
|
||||
'upload_date': '20241120',
|
||||
'timestamp': 1732077000,
|
||||
'episode': 'Episode 63',
|
||||
'episode_id': '185',
|
||||
'episode_number': 63,
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'series': 'Telehommik',
|
||||
'series_id': '9644',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
telecast_id, episode = self._match_valid_url(url).group('id', 'ep')
|
||||
telecast_id = self._match_id(url)
|
||||
episode = traverse_obj(parse_qs(url), ('ep', 0, {int_or_none}, {str_or_none}))
|
||||
video_id = join_nonempty(telecast_id, episode, delim='_')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_player = try_call(lambda: extract_attributes(
|
||||
@@ -79,25 +105,33 @@ class DuoplayIE(InfoExtractor):
|
||||
if not video_player or not video_player.get('manifest-url'):
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
manifest_url = video_player['manifest-url']
|
||||
session_token = self._download_json(
|
||||
'https://sts.postimees.ee/session/register', video_id, 'Registering session',
|
||||
'Unable to register session', headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Original-URI': manifest_url,
|
||||
})['session']
|
||||
|
||||
episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
|
||||
'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4', query={'s': session_token}),
|
||||
**traverse_obj(episode_attr, {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
'title': ('title', {str}),
|
||||
'description': ('synopsis', {str}),
|
||||
'thumbnail': ('images', 'original'),
|
||||
'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
|
||||
'cast': ('cast', {lambda x: x.split(', ')}),
|
||||
'cast': ('cast', filter, {lambda x: x.split(', ')}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
**(traverse_obj(episode_attr, {
|
||||
'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))),
|
||||
'series': 'title',
|
||||
'title': (None, (('subtitle', {str}, filter), {value(f'Episode {episode}' if episode else None)})),
|
||||
'series': ('title', {str}),
|
||||
'series_id': ('telecast_id', {str_or_none}),
|
||||
'season_number': ('season_id', {int_or_none}),
|
||||
'episode': 'subtitle',
|
||||
'episode': ('subtitle', {str}, filter),
|
||||
'episode_number': ('episode_nr', {int_or_none}),
|
||||
'episode_id': ('episode_id', {str_or_none}),
|
||||
}, get_all=False) if episode_attr.get('category') != 'movies' else {}),
|
||||
|
||||
@@ -162,7 +162,7 @@ class DVTVIE(InfoExtractor):
|
||||
items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
|
||||
if items:
|
||||
return self.playlist_result(
|
||||
[self._parse_video_metadata(i, video_id, timestamp) for i in items],
|
||||
(self._parse_video_metadata(i, video_id, timestamp) for i in items),
|
||||
video_id, self._html_search_meta('twitter:title', webpage))
|
||||
|
||||
item = self._search_regex(
|
||||
|
||||
155
yt_dlp/extractor/eggs.py
Normal file
155
yt_dlp/extractor/eggs.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import secrets
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class EggsBaseIE(InfoExtractor):
|
||||
_API_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'apVersion': '8.2.00',
|
||||
'deviceName': 'Android',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._API_HEADERS['deviceId'] = secrets.token_hex(8)
|
||||
|
||||
def _call_api(self, endpoint, video_id):
|
||||
return self._download_json(
|
||||
f'https://app-front-api.eggs.mu/v1/{endpoint}', video_id,
|
||||
headers=self._API_HEADERS)
|
||||
|
||||
def _extract_music_info(self, data):
|
||||
if yt_url := traverse_obj(data, ('youtubeUrl', {url_or_none})):
|
||||
return self.url_result(yt_url, ie=YoutubeIE)
|
||||
|
||||
artist_name = traverse_obj(data, ('artist', 'artistName', {str_or_none}))
|
||||
music_id = traverse_obj(data, ('musicId', {str_or_none}))
|
||||
webpage_url = None
|
||||
if artist_name and music_id:
|
||||
webpage_url = f'https://eggs.mu/artist/{artist_name}/song/{music_id}'
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'vcodec': 'none',
|
||||
'webpage_url': webpage_url,
|
||||
'extractor_key': EggsIE.ie_key(),
|
||||
'extractor': EggsIE.IE_NAME,
|
||||
**traverse_obj(data, {
|
||||
'title': ('musicTitle', {str}),
|
||||
'url': ('musicDataPath', {url_or_none}),
|
||||
'uploader': ('artist', 'displayName', {str}),
|
||||
'uploader_id': ('artist', 'artistId', {str_or_none}),
|
||||
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||
'view_count': ('numberOfMusicPlays', {int_or_none}),
|
||||
'like_count': ('numberOfLikes', {int_or_none}),
|
||||
'comment_count': ('numberOfComments', {int_or_none}),
|
||||
'composers': ('composer', {str}, all),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'artist': ('artist', 'displayName', {str}),
|
||||
})}
|
||||
|
||||
|
||||
class EggsIE(EggsBaseIE):
|
||||
IE_NAME = 'eggs:single'
|
||||
_VALID_URL = r'https?://eggs\.mu/artist/[^/?#]+/song/(?P<id>[\da-f-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||
'info_dict': {
|
||||
'id': '0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||
'ext': 'm4a',
|
||||
'title': 'シネマと信号',
|
||||
'uploader': 'Sunny Girl',
|
||||
'thumbnail': r're:https?://.*\.jpg(?:\?.*)?$',
|
||||
'uploader_id': '1607',
|
||||
'like_count': int,
|
||||
'timestamp': 1731327327,
|
||||
'composers': ['橘高連太郎'],
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'artists': ['Sunny Girl'],
|
||||
'upload_date': '20241111',
|
||||
'tags': ['SunnyGirl', 'シネマと信号'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://eggs.mu/artist/KAMO_3pband/song/1d4bc45f-1af6-47a9-8b30-a70cae350b4f',
|
||||
'info_dict': {
|
||||
'id': '80cLKA2wnoA',
|
||||
'ext': 'mp4',
|
||||
'title': 'KAMO「いい女だから」Audio',
|
||||
'uploader': 'KAMO',
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCsHLBw2__5Q9y55skXPotOg',
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:d260da711ecbec3e720293dc11401b87',
|
||||
'availability': 'public',
|
||||
'uploader_id': '@KAMO_band',
|
||||
'upload_date': '20240925',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/80cLKA2wnoA/maxresdefault.jpg',
|
||||
'comment_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsHLBw2__5Q9y55skXPotOg',
|
||||
'view_count': int,
|
||||
'duration': 151,
|
||||
'like_count': int,
|
||||
'channel': 'KAMO',
|
||||
'playable_in_embed': True,
|
||||
'uploader_url': 'https://www.youtube.com/@KAMO_band',
|
||||
'tags': [],
|
||||
'timestamp': 1727271121,
|
||||
'age_limit': 0,
|
||||
'categories': ['People & Blogs'],
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {'skip_download': 'Youtube'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
json_data = self._call_api(f'musics/{song_id}', song_id)
|
||||
return self._extract_music_info(json_data)
|
||||
|
||||
|
||||
class EggsArtistIE(EggsBaseIE):
|
||||
IE_NAME = 'eggs:artist'
|
||||
_VALID_URL = r'https?://eggs\.mu/artist/(?P<id>\w+)/?(?:[?#&]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl',
|
||||
'info_dict': {
|
||||
'id': '32_sunny_girl',
|
||||
'thumbnail': 'https://image-pro.eggs.mu/profile/1607.jpeg?updated_at=2024-04-03T20%3A06%3A00%2B09%3A00',
|
||||
'description': 'Muddy Mine / 東京高田馬場CLUB PHASE / Gt.Vo 橘高 連太郎 / Ba.Cho 小野 ゆうき / Dr 大森 りゅうひこ',
|
||||
'title': 'Sunny Girl',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
}, {
|
||||
'url': 'https://eggs.mu/artist/KAMO_3pband',
|
||||
'info_dict': {
|
||||
'id': 'KAMO_3pband',
|
||||
'description': '川崎発3ピースバンド',
|
||||
'thumbnail': 'https://image-pro.eggs.mu/profile/35217.jpeg?updated_at=2024-11-27T16%3A31%3A50%2B09%3A00',
|
||||
'title': 'KAMO',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_id = self._match_id(url)
|
||||
artist_data = self._call_api(f'artists/{artist_id}', artist_id)
|
||||
song_data = self._call_api(f'artists/{artist_id}/musics', artist_id)
|
||||
return self.playlist_result(
|
||||
traverse_obj(song_data, ('data', ..., {dict}, {self._extract_music_info})),
|
||||
playlist_id=artist_id, **traverse_obj(artist_data, {
|
||||
'title': ('displayName', {str}),
|
||||
'description': ('profile', {str}),
|
||||
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||
}))
|
||||
@@ -1,28 +1,37 @@
|
||||
import contextlib
|
||||
import inspect
|
||||
import os
|
||||
|
||||
from ..plugins import load_plugins
|
||||
from ..globals import LAZY_EXTRACTORS
|
||||
from ..globals import extractors as _extractors_context
|
||||
|
||||
# NB: Must be before other imports so that plugins can be correctly injected
|
||||
_PLUGIN_CLASSES = load_plugins('extractor', 'IE')
|
||||
_CLASS_LOOKUP = None
|
||||
if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
|
||||
LAZY_EXTRACTORS.value = False
|
||||
else:
|
||||
try:
|
||||
from .lazy_extractors import _CLASS_LOOKUP
|
||||
LAZY_EXTRACTORS.value = True
|
||||
except ImportError:
|
||||
LAZY_EXTRACTORS.value = None
|
||||
|
||||
_LAZY_LOADER = False
|
||||
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
|
||||
with contextlib.suppress(ImportError):
|
||||
from .lazy_extractors import * # noqa: F403
|
||||
from .lazy_extractors import _ALL_CLASSES
|
||||
_LAZY_LOADER = True
|
||||
if not _CLASS_LOOKUP:
|
||||
from . import _extractors
|
||||
|
||||
if not _LAZY_LOADER:
|
||||
from ._extractors import * # noqa: F403
|
||||
_ALL_CLASSES = [ # noqa: F811
|
||||
klass
|
||||
for name, klass in globals().items()
|
||||
_CLASS_LOOKUP = {
|
||||
name: value
|
||||
for name, value in inspect.getmembers(_extractors)
|
||||
if name.endswith('IE') and name != 'GenericIE'
|
||||
]
|
||||
_ALL_CLASSES.append(GenericIE) # noqa: F405
|
||||
}
|
||||
_CLASS_LOOKUP['GenericIE'] = _extractors.GenericIE
|
||||
|
||||
globals().update(_PLUGIN_CLASSES)
|
||||
_ALL_CLASSES[:0] = _PLUGIN_CLASSES.values()
|
||||
# We want to append to the main lookup
|
||||
_current = _extractors_context.value
|
||||
for name, ie in _CLASS_LOOKUP.items():
|
||||
_current.setdefault(name, ie)
|
||||
|
||||
from .common import _PLUGIN_OVERRIDES # noqa: F401
|
||||
|
||||
def __getattr__(name):
|
||||
value = _CLASS_LOOKUP.get(name)
|
||||
if not value:
|
||||
raise AttributeError(f'module {__name__} has no attribute {name}')
|
||||
return value
|
||||
|
||||
@@ -50,7 +50,7 @@ class FacebookIE(InfoExtractor):
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
events/(?:[^/]+/)?|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
groups/[^/]+/(?:permalink|posts)/(?:[\da-f]+/)?|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
@@ -410,6 +410,9 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Comitato Liberi Pensatori',
|
||||
'uploader_id': '100065709540881',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = '1tv'
|
||||
IE_DESC = 'Первый канал'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sport)?1tv\.ru/(?:[^/?#]+/)+(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single format
|
||||
@@ -52,6 +52,9 @@ class FirstTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sport1tv.ru/sport/chempionat-rossii-po-figurnomu-kataniyu-2025',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
@@ -5,8 +6,10 @@ from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
filter_dict,
|
||||
format_field,
|
||||
int_or_none,
|
||||
@@ -16,7 +19,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@@ -29,6 +32,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class FranceTVIE(InfoExtractor):
|
||||
IE_NAME = 'francetv'
|
||||
_VALID_URL = r'francetv:(?P<id>[^@#]+)'
|
||||
_GEO_COUNTRIES = ['FR']
|
||||
_GEO_BYPASS = False
|
||||
@@ -248,18 +252,19 @@ class FranceTVIE(InfoExtractor):
|
||||
|
||||
|
||||
class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetv:site'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba'
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'timestamp': 1514118300,
|
||||
'duration': 2880,
|
||||
'timestamp': 1502623500,
|
||||
'duration': 2580,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20171224',
|
||||
'upload_date': '20170813',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -282,6 +287,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1441,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# geo-restricted livestream (workflow == 'token-akamai')
|
||||
'url': 'https://www.france.tv/france-4/direct.html',
|
||||
@@ -336,19 +342,33 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
nextjs_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children'))
|
||||
|
||||
if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)):
|
||||
# For livestreams we need the id of the stream instead of the currently airing episode id
|
||||
video_id = traverse_obj(nextjs_data, (
|
||||
..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ...,
|
||||
'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)),
|
||||
'options', 'id', {str}, any))
|
||||
else:
|
||||
video_id = traverse_obj(nextjs_data, (
|
||||
..., ..., ..., 'children',
|
||||
lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path,
|
||||
'video', ('playerReplayId', 'siId'), {str}, any))
|
||||
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"',
|
||||
webpage, 'video ID')
|
||||
raise ExtractorError('Unable to extract video ID')
|
||||
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
||||
@@ -441,11 +461,16 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
self.url_result(dailymotion_url, DailymotionIE.ie_key())
|
||||
for dailymotion_url in dailymotion_urls])
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||
r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
video_id = (
|
||||
traverse_obj(webpage, (
|
||||
{find_element(tag='button', attr='data-cy', value='francetv-player-wrapper', html=True)},
|
||||
{extract_attributes}, 'id'))
|
||||
or self._search_regex(
|
||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||
r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
)
|
||||
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
||||
@@ -1,349 +0,0 @@
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
make_archive_id,
|
||||
orderedSet,
|
||||
qualities,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class FunimationBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_REGION = None
|
||||
_TOKEN = None
|
||||
|
||||
def _get_region(self):
|
||||
region_cookie = self._get_cookies('https://www.funimation.com').get('region')
|
||||
region = region_cookie.value if region_cookie else self.get_param('geo_bypass_country')
|
||||
return region or traverse_obj(
|
||||
self._download_json(
|
||||
'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False,
|
||||
note='Checking geo-location', errnote='Unable to fetch geo-location information'),
|
||||
'region') or 'US'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._TOKEN:
|
||||
return
|
||||
try:
|
||||
data = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
|
||||
None, 'Logging in', data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))
|
||||
FunimationBaseIE._TOKEN = data['token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
error = self._parse_json(e.cause.response.read().decode(), None)['error']
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
|
||||
class FunimationPageIE(FunimationBaseIE):
|
||||
IE_NAME = 'funimation:page'
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:(?P<lang>[^/]+)/)?(?:shows|v)/(?P<show>[^/]+)/(?P<episode>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
||||
'info_dict': {
|
||||
'id': '210050',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
# Other metadata is tested in FunimationIE
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'add_ie': ['Funimation'],
|
||||
}, {
|
||||
# Not available in US
|
||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with lang code
|
||||
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.funimation.com/v/a-certain-scientific-railgun/super-powered-level-5',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._REGION:
|
||||
FunimationBaseIE._REGION = self._get_region()
|
||||
|
||||
def _real_extract(self, url):
|
||||
locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode')
|
||||
|
||||
video_id = traverse_obj(self._download_json(
|
||||
f'https://title-api.prd.funimationsvc.com/v1/shows/{show}/episodes/{episode}',
|
||||
f'{show}_{episode}', query={
|
||||
'deviceType': 'web',
|
||||
'region': self._REGION,
|
||||
'locale': locale or 'en',
|
||||
}), ('videoList', ..., 'id'), get_all=False)
|
||||
|
||||
return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id)
|
||||
|
||||
|
||||
class FunimationIE(FunimationBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/player/210051',
|
||||
'info_dict': {
|
||||
'id': '210050',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'episode': 'Broadcast Dub Preview',
|
||||
'episode_id': '210050',
|
||||
'season': 'Extras',
|
||||
'season_id': '166038',
|
||||
'season_number': 99,
|
||||
'series': 'Attack on Titan: Junior High',
|
||||
'description': '',
|
||||
'duration': 155,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'note': 'player_id should be extracted with the relevent compat-opt',
|
||||
'url': 'https://www.funimation.com/player/210051',
|
||||
'info_dict': {
|
||||
'id': '210051',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Broadcast Dub Preview',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'episode': 'Broadcast Dub Preview',
|
||||
'episode_id': '210050',
|
||||
'season': 'Extras',
|
||||
'season_id': '166038',
|
||||
'season_number': 99,
|
||||
'series': 'Attack on Titan: Junior High',
|
||||
'description': '',
|
||||
'duration': 155,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'compat_opts': ['seperate-video-versions'],
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _get_experiences(episode):
|
||||
for lang, lang_data in episode.get('languages', {}).items():
|
||||
for video_data in lang_data.values():
|
||||
for version, f in video_data.items():
|
||||
yield lang, version.title(), f
|
||||
|
||||
def _get_episode(self, webpage, experience_id=None, episode_id=None, fatal=True):
|
||||
""" Extract the episode, season and show objects given either episode/experience id """
|
||||
show = self._parse_json(
|
||||
self._search_regex(
|
||||
r'show\s*=\s*({.+?})\s*;', webpage, 'show data', fatal=fatal),
|
||||
experience_id, transform_source=js_to_json, fatal=fatal) or []
|
||||
for season in show.get('seasons', []):
|
||||
for episode in season.get('episodes', []):
|
||||
if episode_id is not None:
|
||||
if str(episode.get('episodePk')) == episode_id:
|
||||
return episode, season, show
|
||||
continue
|
||||
for _, _, f in self._get_experiences(episode):
|
||||
if f.get('experienceId') == experience_id:
|
||||
return episode, season, show
|
||||
if fatal:
|
||||
raise ExtractorError('Unable to find episode information')
|
||||
else:
|
||||
self.report_warning('Unable to find episode information')
|
||||
return {}, {}, {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
initial_experience_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, initial_experience_id, note=f'Downloading player webpage for {initial_experience_id}')
|
||||
episode, season, show = self._get_episode(webpage, experience_id=int(initial_experience_id))
|
||||
episode_id = str(episode['episodePk'])
|
||||
display_id = episode.get('slug') or episode_id
|
||||
|
||||
formats, subtitles, thumbnails, duration = [], {}, [], 0
|
||||
requested_languages, requested_versions = self._configuration_arg('language'), self._configuration_arg('version')
|
||||
language_preference = qualities((requested_languages or [''])[::-1])
|
||||
source_preference = qualities((requested_versions or ['uncut', 'simulcast'])[::-1])
|
||||
only_initial_experience = 'seperate-video-versions' in self.get_param('compat_opts', [])
|
||||
|
||||
for lang, version, fmt in self._get_experiences(episode):
|
||||
experience_id = str(fmt['experienceId'])
|
||||
if (only_initial_experience and experience_id != initial_experience_id
|
||||
or requested_languages and lang.lower() not in requested_languages
|
||||
or requested_versions and version.lower() not in requested_versions):
|
||||
continue
|
||||
thumbnails.append({'url': fmt.get('poster')})
|
||||
duration = max(duration, fmt.get('duration', 0))
|
||||
format_name = f'{version} {lang} ({experience_id})'
|
||||
self.extract_subtitles(
|
||||
subtitles, experience_id, display_id=display_id, format_name=format_name,
|
||||
episode=episode if experience_id == initial_experience_id else episode_id)
|
||||
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = f'Token {self._TOKEN}'
|
||||
page = self._download_json(
|
||||
f'https://www.funimation.com/api/showexperience/{experience_id}/',
|
||||
display_id, headers=headers, expected_status=403, query={
|
||||
'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)),
|
||||
}, note=f'Downloading {format_name} JSON')
|
||||
sources = page.get('items') or []
|
||||
if not sources:
|
||||
error = try_get(page, lambda x: x['errors'][0], dict)
|
||||
if error:
|
||||
self.report_warning('{} said: Error {} - {}'.format(
|
||||
self.IE_NAME, error.get('code'), error.get('detail') or error.get('title')))
|
||||
else:
|
||||
self.report_warning('No sources found for format')
|
||||
|
||||
current_formats = []
|
||||
for source in sources:
|
||||
source_url = source.get('src')
|
||||
source_type = source.get('videoType') or determine_ext(source_url)
|
||||
if source_type == 'm3u8':
|
||||
current_formats.extend(self._extract_m3u8_formats(
|
||||
source_url, display_id, 'mp4', m3u8_id='{}-{}'.format(experience_id, 'hls'), fatal=False,
|
||||
note=f'Downloading {format_name} m3u8 information'))
|
||||
else:
|
||||
current_formats.append({
|
||||
'format_id': f'{experience_id}-{source_type}',
|
||||
'url': source_url,
|
||||
})
|
||||
for f in current_formats:
|
||||
# TODO: Convert language to code
|
||||
f.update({
|
||||
'language': lang,
|
||||
'format_note': version,
|
||||
'source_preference': source_preference(version.lower()),
|
||||
'language_preference': language_preference(lang.lower()),
|
||||
})
|
||||
formats.extend(current_formats)
|
||||
if not formats and (requested_languages or requested_versions):
|
||||
self.raise_no_formats(
|
||||
'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id)
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'_old_archive_ids': [make_archive_id(self, initial_experience_id)],
|
||||
'display_id': display_id,
|
||||
'duration': duration,
|
||||
'title': episode['episodeTitle'],
|
||||
'description': episode.get('episodeSummary'),
|
||||
'episode': episode.get('episodeTitle'),
|
||||
'episode_number': int_or_none(episode.get('episodeId')),
|
||||
'episode_id': episode_id,
|
||||
'season': season.get('seasonTitle'),
|
||||
'season_number': int_or_none(season.get('seasonId')),
|
||||
'season_id': str_or_none(season.get('seasonPk')),
|
||||
'series': show.get('showTitle'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'_format_sort_fields': ('lang', 'source'),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
|
||||
if isinstance(episode, str):
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.funimation.com/player/{experience_id}/', display_id,
|
||||
fatal=False, note=f'Downloading player webpage for {format_name}')
|
||||
episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False)
|
||||
|
||||
for _, version, f in self._get_experiences(episode):
|
||||
for source in f.get('sources'):
|
||||
for text_track in source.get('textTracks'):
|
||||
if not text_track.get('src'):
|
||||
continue
|
||||
sub_type = text_track.get('type').upper()
|
||||
sub_type = sub_type if sub_type != 'FULL' else None
|
||||
current_sub = {
|
||||
'url': text_track['src'],
|
||||
'name': join_nonempty(version, text_track.get('label'), sub_type, delim=' '),
|
||||
}
|
||||
lang = join_nonempty(text_track.get('language', 'und'),
|
||||
version if version != 'Simulcast' else None,
|
||||
sub_type, delim='_')
|
||||
if current_sub not in subtitles.get(lang, []):
|
||||
subtitles.setdefault(lang, []).append(current_sub)
|
||||
return subtitles
|
||||
|
||||
|
||||
class FunimationShowIE(FunimationBaseIE):
|
||||
IE_NAME = 'funimation:show'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P<locale>[^/]+)?/?shows/(?P<id>[^/?#&]+))/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
|
||||
'info_dict': {
|
||||
'id': '1315000',
|
||||
'title': 'SK8 the Infinity',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# without lang code
|
||||
'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
|
||||
'info_dict': {
|
||||
'id': '39643',
|
||||
'title': 'Ouran High School Host Club',
|
||||
},
|
||||
'playlist_count': 26,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._REGION:
|
||||
FunimationBaseIE._REGION = self._get_region()
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, locale, display_id = self._match_valid_url(url).groups()
|
||||
|
||||
show_info = self._download_json(
|
||||
'https://title-api.prd.funimationsvc.com/v2/shows/{}?region={}&deviceType=web&locale={}'.format(
|
||||
display_id, self._REGION, locale or 'en'), display_id)
|
||||
items_info = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id={}'.format(
|
||||
show_info.get('id')), display_id)
|
||||
|
||||
vod_items = traverse_obj(items_info, ('items', ..., lambda k, _: re.match(r'(?i)mostRecent[AS]vod', k), 'item'))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': str_or_none(show_info['id']),
|
||||
'title': show_info['name'],
|
||||
'entries': orderedSet(
|
||||
self.url_result(
|
||||
'{}/{}'.format(base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(),
|
||||
vod_item.get('episodeId'), vod_item.get('episodeName'))
|
||||
for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder', -1))),
|
||||
}
|
||||
@@ -293,6 +293,19 @@ class GenericIE(InfoExtractor):
|
||||
'timestamp': 1378272859.0,
|
||||
},
|
||||
},
|
||||
# Live DASH MPD
|
||||
{
|
||||
'url': 'https://livesim2.dashif.org/livesim2/ato_10/testpic_2s/Manifest.mpd',
|
||||
'info_dict': {
|
||||
'id': 'Manifest',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Manifest \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'livestream',
|
||||
},
|
||||
},
|
||||
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
|
||||
{
|
||||
'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
|
||||
@@ -2436,10 +2449,9 @@ class GenericIE(InfoExtractor):
|
||||
subtitles = {}
|
||||
if format_id.endswith('mpegurl') or ext == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
||||
elif format_id.endswith(('mpd', 'dash+xml')) or ext == 'mpd':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
||||
elif format_id == 'f4m' or ext == 'f4m':
|
||||
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
||||
# Don't check for DASH/mpd here, do it later w/ first_bytes. Same number of requests either way
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
@@ -2521,6 +2533,7 @@ class GenericIE(InfoExtractor):
|
||||
doc,
|
||||
mpd_base_url=full_response.url.rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
self.report_detected('DASH manifest')
|
||||
return info_dict
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class GigyaBaseIE(InfoExtractor):
|
||||
def _gigya_login(self, auth_data):
|
||||
auth_info = self._download_json(
|
||||
'https://accounts.eu1.gigya.com/accounts.login', None,
|
||||
note='Logging in', errnote='Unable to log in',
|
||||
data=urlencode_postdata(auth_data))
|
||||
|
||||
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
f'Unable to login: {error_message}', expected=True)
|
||||
return auth_info
|
||||
@@ -1,32 +1,48 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class GloboIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||
_VALID_URL = r'(?:globo:|https?://[^/?#]+?\.globo\.com/(?:[^/?#]+/))(?P<id>\d{7,})'
|
||||
_NETRC_MACHINE = 'globo'
|
||||
_VIDEO_VIEW = '''
|
||||
query getVideoView($videoId: ID!) {
|
||||
video(id: $videoId) {
|
||||
duration
|
||||
description
|
||||
relatedEpisodeNumber
|
||||
relatedSeasonNumber
|
||||
headline
|
||||
title {
|
||||
originProgramId
|
||||
headline
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
||||
'url': 'https://globoplay.globo.com/v/3607726/',
|
||||
'info_dict': {
|
||||
'id': '3607726',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
|
||||
'duration': 103.204,
|
||||
'uploader': 'G1',
|
||||
'uploader_id': '2015',
|
||||
'uploader': 'G1 ao vivo',
|
||||
'uploader_id': '4209',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -38,39 +54,46 @@ class GloboIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
|
||||
'duration': 137.973,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
'uploader': 'Bom Dia Brasil',
|
||||
'uploader_id': '810',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'globo:3607726',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://globoplay.globo.com/v/10248083/',
|
||||
},
|
||||
{
|
||||
'url': 'globo:8013907', # needs subscription to globoplay
|
||||
'info_dict': {
|
||||
'id': '10248083',
|
||||
'id': '8013907',
|
||||
'ext': 'mp4',
|
||||
'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
|
||||
'duration': 530.964,
|
||||
'uploader': 'SporTV',
|
||||
'uploader_id': '698',
|
||||
'title': 'Capítulo de 14/08/1989',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Tieta',
|
||||
'uploader_id': '11895',
|
||||
'duration': 2858.389,
|
||||
'subtitles': 'count:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'globo:12824146',
|
||||
'info_dict': {
|
||||
'id': '12824146',
|
||||
'ext': 'mp4',
|
||||
'title': 'Acordo de damas',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Rensga Hits!',
|
||||
'uploader_id': '20481',
|
||||
'duration': 1953.994,
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -80,98 +103,71 @@ class GloboIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._request_webpage(
|
||||
HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
|
||||
video_id, 'Getting cookies')
|
||||
|
||||
video = self._download_json(
|
||||
f'http://api.globovideos.com/videos/{video_id}/playlist',
|
||||
video_id)['videos'][0]
|
||||
if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True:
|
||||
self.report_drm(video_id)
|
||||
|
||||
title = video['title']
|
||||
info = self._download_json(
|
||||
'https://cloud-jarvis.globo.com/graphql', video_id,
|
||||
query={
|
||||
'operationName': 'getVideoView',
|
||||
'variables': json.dumps({'videoId': video_id}),
|
||||
'query': self._VIDEO_VIEW,
|
||||
}, headers={
|
||||
'content-type': 'application/json',
|
||||
'x-platform-id': 'web',
|
||||
'x-device-id': 'desktop',
|
||||
'x-client-version': '2024.12-5',
|
||||
})['data']['video']
|
||||
|
||||
formats = []
|
||||
security = self._download_json(
|
||||
'https://playback.video.globo.com/v2/video-session', video_id, f'Downloading security hash for {video_id}',
|
||||
headers={'content-type': 'application/json'}, data=json.dumps({
|
||||
'player_type': 'desktop',
|
||||
video = self._download_json(
|
||||
'https://playback.video.globo.com/v4/video-session', video_id,
|
||||
f'Downloading resource info for {video_id}',
|
||||
headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(filter_dict({
|
||||
'player_type': 'mirakulo_8k_hdr',
|
||||
'video_id': video_id,
|
||||
'quality': 'max',
|
||||
'content_protection': 'widevine',
|
||||
'vsid': '581b986b-4c40-71f0-5a58-803e579d5fa2',
|
||||
'tz': '-3.0:00',
|
||||
}).encode())
|
||||
'vsid': f'{uuid.uuid4()}',
|
||||
'consumption': 'streaming',
|
||||
'capabilities': {'low_latency': True},
|
||||
'tz': '-03:00',
|
||||
'Authorization': try_get(self._get_cookies('https://globo.com'),
|
||||
lambda x: f'Bearer {x["GLBID"].value}'),
|
||||
'version': 1,
|
||||
})).encode())
|
||||
|
||||
self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie')
|
||||
if traverse_obj(video, ('resource', 'drm_protection_enabled', {bool})):
|
||||
self.report_drm(video_id)
|
||||
|
||||
security_hash = security['sources'][0]['token']
|
||||
if not security_hash:
|
||||
message = security.get('message')
|
||||
if message:
|
||||
raise ExtractorError(
|
||||
f'{self.IE_NAME} returned error: {message}', expected=True)
|
||||
main_source = video['sources'][0]
|
||||
|
||||
hash_code = security_hash[:2]
|
||||
padding = '%010d' % random.randint(1, 10000000000)
|
||||
if hash_code in ('04', '14'):
|
||||
received_time = security_hash[3:13]
|
||||
received_md5 = security_hash[24:]
|
||||
hash_prefix = security_hash[:23]
|
||||
elif hash_code in ('02', '12', '03', '13'):
|
||||
received_time = security_hash[2:12]
|
||||
received_md5 = security_hash[22:]
|
||||
padding += '1'
|
||||
hash_prefix = '05' + security_hash[:22]
|
||||
# 4k streams are exclusively outputted in dash, so we need to filter these out
|
||||
if determine_ext(main_source['url']) == 'mpd':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(main_source['url'], video_id, mpd_id='dash')
|
||||
else:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
main_source['url'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
padded_sign_time = str(int(received_time) + 86400) + padding
|
||||
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
|
||||
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||
signed_hash = hash_prefix + padded_sign_time + signed_md5
|
||||
source = security['sources'][0]['url_parts']
|
||||
resource_url = source['scheme'] + '://' + source['domain'] + source['path']
|
||||
signed_url = '{}?h={}&k=html5&a={}'.format(resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
|
||||
|
||||
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
|
||||
for resource in video['resources']:
|
||||
if resource.get('type') == 'subtitle':
|
||||
subtitles.setdefault(resource.get('language') or 'por', []).append({
|
||||
'url': resource.get('url'),
|
||||
})
|
||||
subs = try_get(security, lambda x: x['source']['subtitles'], expected_type=dict) or {}
|
||||
for sub_lang, sub_url in subs.items():
|
||||
if sub_url:
|
||||
subtitles.setdefault(sub_lang or 'por', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
subs = try_get(security, lambda x: x['source']['subtitles_webvtt'], expected_type=dict) or {}
|
||||
for sub_lang, sub_url in subs.items():
|
||||
if sub_url:
|
||||
subtitles.setdefault(sub_lang or 'por', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
duration = float_or_none(video.get('duration'), 1000)
|
||||
uploader = video.get('channel')
|
||||
uploader_id = str_or_none(video.get('channel_id'))
|
||||
self._merge_subtitles(traverse_obj(main_source, ('text', ..., ('caption', 'subtitle'), {
|
||||
'url': ('srt', 'url', {url_or_none}),
|
||||
}, all, {subs_list_to_dict(lang='pt-BR')})), target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
**traverse_obj(info, {
|
||||
'title': ('headline', {str}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'uploader': ('title', 'headline', {str}),
|
||||
'uploader_id': ('title', 'originProgramId', {str_or_none}),
|
||||
'episode_number': ('relatedEpisodeNumber', {int_or_none}),
|
||||
'season_number': ('relatedSeasonNumber', {int_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class GloboArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?!globoplay).+?\.globo\.com/(?:[^/?#]+/)*(?P<id>[^/?#.]+)(?:\.html)?'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id=["\'](\d{7,})["\']',
|
||||
|
||||
@@ -1,40 +1,48 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GoodGameIE(InfoExtractor):
|
||||
IE_NAME = 'goodgame:stream'
|
||||
_VALID_URL = r'https?://goodgame\.ru/channel/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://goodgame\.ru/(?!channel/)(?P<id>[\w.*-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://goodgame.ru/channel/Pomi/#autoplay',
|
||||
'url': 'https://goodgame.ru/TGW#autoplay',
|
||||
'info_dict': {
|
||||
'id': 'pomi',
|
||||
'id': '7998',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Reynor vs Special \(1/2,bo3\) Wardi Spring EU \- playoff \(финальный день\) \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'channel_id': '1644',
|
||||
'channel': 'Pomi',
|
||||
'channel_url': 'https://goodgame.ru/channel/Pomi/',
|
||||
'description': 'md5:4a87b775ee7b2b57bdccebe285bbe171',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'channel_id': '7998',
|
||||
'title': r're:шоуматч Happy \(NE\) vs Fortitude \(UD\), потом ладдер и дс \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'channel_url': 'https://goodgame.ru/TGW',
|
||||
'thumbnail': 'https://hls.goodgame.ru/previews/7998_240.jpg',
|
||||
'uploader': 'TGW',
|
||||
'channel': 'JosephStalin',
|
||||
'live_status': 'is_live',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'channel_follower_count': int,
|
||||
'uploader_id': '2899',
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'May not be online',
|
||||
}, {
|
||||
'url': 'https://goodgame.ru/Mr.Gray',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://goodgame.ru/HeDoPa3yMeHue*',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
response = self._download_json(f'https://api2.goodgame.ru/v2/streams/{channel_name}', channel_name)
|
||||
player_id = response['channel']['gg_player_src']
|
||||
response = self._download_json(f'https://goodgame.ru/api/4/users/{channel_name}/stream', channel_name)
|
||||
player_id = response['streamkey']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
if response.get('status') == 'Live':
|
||||
if response.get('status'):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://hls.goodgame.ru/manifest/{player_id}_master.m3u8',
|
||||
channel_name, 'mp4', live=True)
|
||||
@@ -45,13 +53,17 @@ class GoodGameIE(InfoExtractor):
|
||||
'id': player_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': traverse_obj(response, ('channel', 'title')),
|
||||
'channel': channel_name,
|
||||
'channel_id': str_or_none(traverse_obj(response, ('channel', 'id'))),
|
||||
'channel_url': response.get('url'),
|
||||
'description': clean_html(traverse_obj(response, ('channel', 'description'))),
|
||||
'thumbnail': traverse_obj(response, ('channel', 'thumb')),
|
||||
'is_live': bool(formats),
|
||||
'view_count': int_or_none(response.get('viewers')),
|
||||
'age_limit': 18 if traverse_obj(response, ('channel', 'adult')) else None,
|
||||
**traverse_obj(response, {
|
||||
'title': ('title', {str}),
|
||||
'channel': ('channelkey', {str}),
|
||||
'channel_id': ('id', {str_or_none}),
|
||||
'channel_url': ('link', {url_or_none}),
|
||||
'uploader': ('streamer', 'username', {str}),
|
||||
'uploader_id': ('streamer', 'id', {str_or_none}),
|
||||
'thumbnail': ('preview', {url_or_none}, {self._proto_relative_url}),
|
||||
'concurrent_view_count': ('viewers', {int_or_none}),
|
||||
'channel_follower_count': ('followers', {int_or_none}),
|
||||
'age_limit': ('adult', {bool}, {lambda x: 18 if x else None}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
)
|
||||
@@ -76,6 +75,7 @@ class GoPlayIE(InfoExtractor):
|
||||
if not self._id_token:
|
||||
raise self.raise_login_required(method='password')
|
||||
|
||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
@@ -86,9 +86,10 @@ class GoPlayIE(InfoExtractor):
|
||||
|
||||
nextjs_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
|
||||
(..., {json.loads}, ..., {self._find_json}, ...))
|
||||
meta = traverse_obj(nextjs_data, (
|
||||
..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
|
||||
..., ..., 'children', ..., ..., 'children',
|
||||
lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any))
|
||||
|
||||
video_id = meta['uuid']
|
||||
info_dict = traverse_obj(meta, {
|
||||
|
||||
@@ -2,12 +2,12 @@ import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
decode_base_n,
|
||||
encode_base_n,
|
||||
filter_dict,
|
||||
@@ -15,12 +15,12 @@ from ..utils import (
|
||||
format_field,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
lowercase_escape,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
_ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
|
||||
@@ -28,63 +28,30 @@ _ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678
|
||||
|
||||
def _pk_to_id(media_id):
|
||||
"""Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
|
||||
return encode_base_n(int(media_id.split('_')[0]), table=_ENCODING_CHARS)
|
||||
pk = int(str(media_id).split('_')[0])
|
||||
return encode_base_n(pk, table=_ENCODING_CHARS)
|
||||
|
||||
|
||||
def _id_to_pk(shortcode):
|
||||
"""Covert a shortcode to a numeric value"""
|
||||
return decode_base_n(shortcode[:11], table=_ENCODING_CHARS)
|
||||
"""Convert a shortcode to a numeric value"""
|
||||
if len(shortcode) > 28:
|
||||
shortcode = shortcode[:-28]
|
||||
return decode_base_n(shortcode, table=_ENCODING_CHARS)
|
||||
|
||||
|
||||
class InstagramBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'instagram'
|
||||
_IS_LOGGED_IN = False
|
||||
|
||||
_API_BASE_URL = 'https://i.instagram.com/api/v1'
|
||||
_LOGIN_URL = 'https://www.instagram.com/accounts/login'
|
||||
_API_HEADERS = {
|
||||
'X-IG-App-ID': '936619743392459',
|
||||
'X-ASBD-ID': '198387',
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._IS_LOGGED_IN:
|
||||
return
|
||||
|
||||
login_webpage = self._download_webpage(
|
||||
self._LOGIN_URL, None, note='Downloading login webpage', errnote='Failed to download login webpage')
|
||||
|
||||
shared_data = self._parse_json(self._search_regex(
|
||||
r'window\._sharedData\s*=\s*({.+?});', login_webpage, 'shared data', default='{}'), None)
|
||||
|
||||
login = self._download_json(
|
||||
f'{self._LOGIN_URL}/ajax/', None, note='Logging in', headers={
|
||||
**self._API_HEADERS,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRFToken': shared_data['config']['csrf_token'],
|
||||
'X-Instagram-AJAX': shared_data['rollout_hash'],
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
}, data=urlencode_postdata({
|
||||
'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
|
||||
'username': username,
|
||||
'queryParams': '{}',
|
||||
'optIntoOneTap': 'false',
|
||||
'stopDeletionNonce': '',
|
||||
'trustedDeviceRecords': '{}',
|
||||
}))
|
||||
|
||||
if not login.get('authenticated'):
|
||||
if login.get('message'):
|
||||
raise ExtractorError(f'Unable to login: {login["message"]}')
|
||||
elif login.get('user'):
|
||||
raise ExtractorError('Unable to login: Sorry, your password was incorrect. Please double-check your password.', expected=True)
|
||||
elif login.get('user') is False:
|
||||
raise ExtractorError('Unable to login: The username you entered doesn\'t belong to an account. Please check your username and try again.', expected=True)
|
||||
raise ExtractorError('Unable to login')
|
||||
InstagramBaseIE._IS_LOGGED_IN = True
|
||||
@property
|
||||
def _api_headers(self):
|
||||
return {
|
||||
'X-IG-App-ID': self._configuration_arg('app_id', ['936619743392459'], ie_key=InstagramIE)[0],
|
||||
'X-ASBD-ID': '198387',
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
}
|
||||
|
||||
def _get_count(self, media, kind, *keys):
|
||||
return traverse_obj(
|
||||
@@ -209,7 +176,7 @@ class InstagramBaseIE(InfoExtractor):
|
||||
def _get_comments(self, video_id):
|
||||
comments_info = self._download_json(
|
||||
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/comments/?can_support_threading=true&permalink_enabled=false', video_id,
|
||||
fatal=False, errnote='Comments extraction failed', note='Downloading comments info', headers=self._API_HEADERS) or {}
|
||||
fatal=False, errnote='Comments extraction failed', note='Downloading comments info', headers=self._api_headers) or {}
|
||||
|
||||
comment_data = traverse_obj(comments_info, ('edge_media_to_parent_comment', 'edges'), 'comments')
|
||||
for comment_dict in comment_data or []:
|
||||
@@ -254,7 +221,7 @@ class InstagramIOSIE(InfoExtractor):
|
||||
|
||||
|
||||
class InstagramIE(InstagramBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/(?!share/)[^/?#]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
@@ -402,14 +369,14 @@ class InstagramIE(InstagramBaseIE):
|
||||
info = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id,
|
||||
fatal=False, errnote='Video info extraction failed',
|
||||
note='Downloading video info', headers=self._API_HEADERS), ('items', 0))
|
||||
note='Downloading video info', headers=self._api_headers), ('items', 0))
|
||||
if info:
|
||||
media.update(info)
|
||||
return self._extract_product(media)
|
||||
|
||||
api_check = self._download_json(
|
||||
f'{self._API_BASE_URL}/web/get_ruling_for_content/?content_type=MEDIA&target_id={_id_to_pk(video_id)}',
|
||||
video_id, headers=self._API_HEADERS, fatal=False, note='Setting up session', errnote=False) or {}
|
||||
video_id, headers=self._api_headers, fatal=False, note='Setting up session', errnote=False) or {}
|
||||
csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken')
|
||||
|
||||
if not csrf_token:
|
||||
@@ -429,7 +396,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
general_info = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', video_id, fatal=False, errnote=False,
|
||||
headers={
|
||||
**self._API_HEADERS,
|
||||
**self._api_headers,
|
||||
'X-CSRFToken': csrf_token or '',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
@@ -437,7 +404,6 @@ class InstagramIE(InstagramBaseIE):
|
||||
'doc_id': '8845758582119845',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
||||
|
||||
if not general_info:
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
@@ -466,6 +432,26 @@ class InstagramIE(InstagramBaseIE):
|
||||
media.update(traverse_obj(
|
||||
additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
|
||||
|
||||
else:
|
||||
xdt_shortcode_media = traverse_obj(general_info, ('data', 'xdt_shortcode_media', {dict})) or {}
|
||||
if not xdt_shortcode_media:
|
||||
error = join_nonempty('title', 'description', delim=': ', from_dict=api_check)
|
||||
if 'Restricted Video' in error:
|
||||
self.raise_login_required(error)
|
||||
elif error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
elif len(video_id) > 28:
|
||||
# It's a private post (video_id == shortcode + 28 extra characters)
|
||||
# Only raise after getting empty response; sometimes "long"-shortcode posts are public
|
||||
self.raise_login_required(
|
||||
'This content is only available for registered users who follow this account')
|
||||
raise ExtractorError(
|
||||
'Instagram sent an empty media response. Check if this post is accessible in your '
|
||||
f'browser without being logged-in. If it is not, then u{self._login_hint()[1:]}. '
|
||||
'Otherwise, if the post is accessible in browser without being logged-in'
|
||||
f'{bug_reports_message(before=",")}', expected=True)
|
||||
media.update(xdt_shortcode_media)
|
||||
|
||||
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
||||
|
||||
@@ -485,8 +471,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
return self.playlist_result(
|
||||
self._extract_nodes(nodes, True), video_id,
|
||||
format_field(username, None, 'Post by %s'), description)
|
||||
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
raise ExtractorError('There is no video in this post', expected=True)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
@@ -689,7 +674,7 @@ class InstagramTagIE(InstagramPlaylistBaseIE):
|
||||
|
||||
|
||||
class InstagramStoryIE(InstagramBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/]+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/?#]+)(?:/(?P<id>\d+))?'
|
||||
IE_NAME = 'instagram:story'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -699,25 +684,38 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||
'title': 'Rare',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/stories/fruits_zipper/3570766765028588805/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/stories/fruits_zipper',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, story_id = self._match_valid_url(url).groups()
|
||||
story_info = self._download_webpage(url, story_id)
|
||||
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
|
||||
username, story_id = self._match_valid_url(url).group('user', 'id')
|
||||
if username == 'highlights' and not story_id: # story id is only mandatory for highlights
|
||||
raise ExtractorError('Input URL is missing a highlight ID', expected=True)
|
||||
display_id = story_id or username
|
||||
story_info = self._download_webpage(url, display_id)
|
||||
user_info = self._search_json(r'"user":', story_info, 'user info', display_id, fatal=False)
|
||||
if not user_info:
|
||||
self.raise_login_required('This content is unreachable')
|
||||
|
||||
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
|
||||
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
||||
if not story_info_url: # user id is only mandatory for non-highlights
|
||||
raise ExtractorError('Unable to extract user id')
|
||||
if username == 'highlights':
|
||||
story_info_url = f'highlight:{story_id}'
|
||||
else:
|
||||
if not user_id: # user id is only mandatory for non-highlights
|
||||
raise ExtractorError('Unable to extract user id')
|
||||
story_info_url = user_id
|
||||
|
||||
videos = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
||||
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
|
||||
display_id, errnote=False, fatal=False, headers=self._api_headers), 'reels')
|
||||
if not videos:
|
||||
self.raise_login_required('You need to log in to access this content')
|
||||
user_info = traverse_obj(videos, (user_id, 'user', {dict})) or {}
|
||||
|
||||
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
|
||||
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
|
||||
@@ -727,6 +725,7 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
|
||||
info_data = []
|
||||
for highlight in highlights:
|
||||
highlight.setdefault('user', {}).update(user_info)
|
||||
highlight_data = self._extract_product(highlight)
|
||||
if highlight_data.get('formats'):
|
||||
info_data.append({
|
||||
@@ -734,4 +733,7 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||
'uploader_id': user_id,
|
||||
**filter_dict(highlight_data),
|
||||
})
|
||||
if username != 'highlights' and story_id and not self._yes_playlist(username, story_id):
|
||||
return traverse_obj(info_data, (lambda _, v: v['id'] == _pk_to_id(story_id), any))
|
||||
|
||||
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
|
||||
|
||||
@@ -2,10 +2,12 @@ import hashlib
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,7 +29,7 @@ class JamendoIE(InfoExtractor):
|
||||
'ext': 'flac',
|
||||
# 'title': 'Maya Filipič - Stories from Emona I',
|
||||
'title': 'Stories from Emona I',
|
||||
'artist': 'Maya Filipič',
|
||||
'artists': ['Maya Filipič'],
|
||||
'album': 'Between two worlds',
|
||||
'track': 'Stories from Emona I',
|
||||
'duration': 210,
|
||||
@@ -93,9 +95,15 @@ class JamendoIE(InfoExtractor):
|
||||
if not cover_url or cover_url in urls:
|
||||
continue
|
||||
urls.append(cover_url)
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(cover_url), track_id, 'Checking thumbnail extension',
|
||||
errnote=False, fatal=False)
|
||||
if not urlh:
|
||||
continue
|
||||
size = int_or_none(cover_id.lstrip('size'))
|
||||
thumbnails.append({
|
||||
'id': cover_id,
|
||||
'ext': urlhandle_detect_ext(urlh, default='jpg'),
|
||||
'url': cover_url,
|
||||
'width': size,
|
||||
'height': size,
|
||||
|
||||
@@ -39,7 +39,7 @@ class LaracastsBaseIE(InfoExtractor):
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('largeThumbnail', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'date': ('dateSegments', 'published', {unified_strdate}),
|
||||
'upload_date': ('dateSegments', 'published', {unified_strdate}),
|
||||
}))
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ class LaracastsIE(LaracastsBaseIE):
|
||||
'title': 'Hello, Laravel',
|
||||
'ext': 'mp4',
|
||||
'duration': 519,
|
||||
'date': '20240312',
|
||||
'upload_date': '20240312',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
|
||||
'description': 'md5:ddd658bb241975871d236555657e1dd1',
|
||||
'season_number': 1,
|
||||
|
||||
@@ -26,6 +26,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
||||
_OPT_CLAIM_ID = f'[^$@:/?#&]+(?:[:#]{_CLAIM_ID_REGEX})?'
|
||||
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||
_UNSUPPORTED_STREAM_TYPES = ['binary']
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params, resource):
|
||||
@@ -310,7 +311,13 @@ class LBRYIE(LBRYBaseIE):
|
||||
if stream_type in self._SUPPORTED_STREAM_TYPES:
|
||||
claim_id, is_live = result['claim_id'], False
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
'get', claim_id, {
|
||||
'uri': uri,
|
||||
**traverse_obj(parse_qs(url), {
|
||||
'signature': ('signature', 0),
|
||||
'signature_ts': ('signature_ts', 0),
|
||||
}),
|
||||
}, 'streaming url')['streaming_url']
|
||||
|
||||
# GET request to v3 API returns original video/audio file if available
|
||||
direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
|
||||
@@ -330,12 +337,15 @@ class LBRYIE(LBRYBaseIE):
|
||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
||||
})
|
||||
|
||||
final_url = None
|
||||
# HEAD request returns redirect response to m3u8 URL if available
|
||||
final_url = self._request_webpage(
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(streaming_url), display_id, headers=headers,
|
||||
note='Downloading streaming redirect url info').url
|
||||
note='Downloading streaming redirect url info', fatal=False)
|
||||
if urlh:
|
||||
final_url = urlh.url
|
||||
|
||||
elif result.get('value_type') == 'stream':
|
||||
elif result.get('value_type') == 'stream' and stream_type not in self._UNSUPPORTED_STREAM_TYPES:
|
||||
claim_id, is_live = result['signing_channel']['claim_id'], True
|
||||
live_data = self._download_json(
|
||||
'https://api.odysee.live/livestream/is_live', claim_id,
|
||||
|
||||
87
yt_dlp/extractor/loco.py
Normal file
87
yt_dlp/extractor/loco.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, url_or_none
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class LocoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?loco\.com/(?P<type>streamers|stream)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://loco.com/streamers/teuzinfps',
|
||||
'info_dict': {
|
||||
'id': 'teuzinfps',
|
||||
'ext': 'mp4',
|
||||
'title': r're:MS BOLADAO, RESENHA & GAMEPLAY ALTO NIVEL',
|
||||
'description': 'bom e novo',
|
||||
'uploader_id': 'RLUVE3S9JU',
|
||||
'channel': 'teuzinfps',
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/743701a9-98ca-41ae-9a8b-70bd5da070ad.jpg',
|
||||
'tags': ['MMORPG', 'Gameplay'],
|
||||
'series': 'Tibia',
|
||||
'timestamp': int,
|
||||
'modified_timestamp': int,
|
||||
'live_status': 'is_live',
|
||||
'upload_date': str,
|
||||
'modified_date': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://loco.com/stream/c64916eb-10fb-46a9-9a19-8c4b7ed064e7',
|
||||
'md5': '45ebc8a47ee1c2240178757caf8881b5',
|
||||
'info_dict': {
|
||||
'id': 'c64916eb-10fb-46a9-9a19-8c4b7ed064e7',
|
||||
'ext': 'mp4',
|
||||
'title': 'PAULINHO LOKO NA LOCO!',
|
||||
'description': 'live on na loco',
|
||||
'uploader_id': '2MDO7Z1DPM',
|
||||
'channel': 'paulinholokobr',
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 14491,
|
||||
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/59b5970b-23c1-4518-9e96-17ce341299fe.jpg',
|
||||
'tags': ['Gameplay'],
|
||||
'series': 'GTA 5',
|
||||
'timestamp': 1740612872,
|
||||
'modified_timestamp': 1740613037,
|
||||
'upload_date': '20250226',
|
||||
'modified_date': '20250226',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_type, video_id = self._match_valid_url(url).group('type', 'id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')}))
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),
|
||||
'id': video_id,
|
||||
'is_live': video_type == 'streamers',
|
||||
**traverse_obj(stream, {
|
||||
'title': ('title', {str}),
|
||||
'series': ('game_name', {str}),
|
||||
'uploader_id': ('user_uid', {str}),
|
||||
'channel': ('alias', {str}),
|
||||
'description': ('description', {str}),
|
||||
'concurrent_view_count': ('viewersCurrent', {int_or_none}),
|
||||
'view_count': ('total_views', {int_or_none}),
|
||||
'thumbnail': ('thumbnail_url_small', {url_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'timestamp': ('started_at', {int_or_none(scale=1000)}),
|
||||
'modified_timestamp': ('updated_at', {int_or_none(scale=1000)}),
|
||||
'comment_count': ('comments_count', {int_or_none}),
|
||||
'channel_follower_count': ('followers_count', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -1,35 +1,36 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_age_limit, parse_duration, traverse_obj
|
||||
from ..utils import parse_age_limit, parse_duration, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MagellanTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?magellantv\.com/(?:watch|video)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.magellantv.com/watch/my-dads-on-death-row?type=v',
|
||||
'url': 'https://www.magellantv.com/watch/incas-the-new-story?type=v',
|
||||
'info_dict': {
|
||||
'id': 'my-dads-on-death-row',
|
||||
'id': 'incas-the-new-story',
|
||||
'ext': 'mp4',
|
||||
'title': 'My Dad\'s On Death Row',
|
||||
'description': 'md5:33ba23b9f0651fc4537ed19b1d5b0d7a',
|
||||
'duration': 3780.0,
|
||||
'title': 'Incas: The New Story',
|
||||
'description': 'md5:936c7f6d711c02dfb9db22a067b586fe',
|
||||
'age_limit': 14,
|
||||
'tags': ['Justice', 'Reality', 'United States', 'True Crime'],
|
||||
'duration': 3060.0,
|
||||
'tags': ['Ancient History', 'Archaeology', 'Anthropology'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.magellantv.com/video/james-bulger-the-new-revelations',
|
||||
'url': 'https://www.magellantv.com/video/tortured-to-death-murdering-the-nanny',
|
||||
'info_dict': {
|
||||
'id': 'james-bulger-the-new-revelations',
|
||||
'id': 'tortured-to-death-murdering-the-nanny',
|
||||
'ext': 'mp4',
|
||||
'title': 'James Bulger: The New Revelations',
|
||||
'description': 'md5:7b97922038bad1d0fe8d0470d8a189f2',
|
||||
'title': 'Tortured to Death: Murdering the Nanny',
|
||||
'description': 'md5:d87033594fa218af2b1a8b49f52511e5',
|
||||
'age_limit': 14,
|
||||
'duration': 2640.0,
|
||||
'age_limit': 0,
|
||||
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
|
||||
'tags': ['True Crime', 'Murder'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.magellantv.com/watch/celebration-nation',
|
||||
'url': 'https://www.magellantv.com/watch/celebration-nation?type=s',
|
||||
'info_dict': {
|
||||
'id': 'celebration-nation',
|
||||
'ext': 'mp4',
|
||||
@@ -43,10 +44,19 @@ class MagellanTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', 'reactContext',
|
||||
(('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
|
||||
context = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']
|
||||
data = traverse_obj(context, ((('video', 'detail'), ('series', 'currentEpisode')), {dict}, any))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for m3u8_url in set(traverse_obj(data, ((('manifests', ..., 'hls'), 'jwp_video_url'), {url_or_none}))):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if not formats and (error := traverse_obj(context, ('errorDetailPage', 'errorMessage', {str}))):
|
||||
if 'available in your country' in error:
|
||||
self.raise_geo_restricted(msg=error)
|
||||
self.raise_no_formats(f'{self.IE_NAME} said: {error}', expected=True)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -102,11 +102,10 @@ class MedalTVIE(InfoExtractor):
|
||||
item_id = item_id or '%dp' % height
|
||||
if item_id not in item_url:
|
||||
return
|
||||
width = int(round(aspect_ratio * height))
|
||||
container.append({
|
||||
'url': item_url,
|
||||
id_key: item_id,
|
||||
'width': width,
|
||||
'width': round(aspect_ratio * height),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ class MicrosoftEmbedIE(InfoExtractor):
|
||||
'timestamp': 1631658316,
|
||||
'upload_date': '20210914',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: syntax error: line 1, column 0'],
|
||||
}]
|
||||
_API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/'
|
||||
|
||||
@@ -36,11 +37,11 @@ class MicrosoftEmbedIE(InfoExtractor):
|
||||
formats = []
|
||||
for source_type, source in metadata['streams'].items():
|
||||
if source_type == 'smooth_Streaming':
|
||||
formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss'))
|
||||
formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss', fatal=False))
|
||||
elif source_type == 'apple_HTTP_Live_Streaming':
|
||||
formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4'))
|
||||
formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4', fatal=False))
|
||||
elif source_type == 'mPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(source['url'], video_id))
|
||||
formats.extend(self._extract_mpd_formats(source['url'], video_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source_type,
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from .telecinco import TelecincoBaseIE
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
@@ -79,10 +81,20 @@ class MiTeleIE(TelecincoBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
pre_player = self._parse_json(self._search_regex(
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
||||
webpage, 'Pre Player'), display_id)['prePlayer']
|
||||
|
||||
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
||||
webpage = self._download_webpage(url, display_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||
raise
|
||||
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||
|
||||
pre_player = self._search_json(
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
||||
webpage, 'Pre Player', display_id)['prePlayer']
|
||||
title = pre_player['title']
|
||||
video_info = self._parse_content(pre_player['video'], url)
|
||||
content = pre_player.get('content') or {}
|
||||
|
||||
@@ -1,167 +1,215 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MSNIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/?#]+/)+(?P<display_id>[^/?#]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.msn.com/en-in/money/video/7-ways-to-get-rid-of-chest-congestion/vi-BBPxU6d',
|
||||
'md5': '087548191d273c5c55d05028f8d2cbcd',
|
||||
'url': 'https://www.msn.com/en-gb/video/news/president-macron-interrupts-trump-over-ukraine-funding/vi-AA1zMcD7',
|
||||
'info_dict': {
|
||||
'id': 'BBPxU6d',
|
||||
'display_id': '7-ways-to-get-rid-of-chest-congestion',
|
||||
'id': 'AA1zMcD7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seven ways to get rid of chest congestion',
|
||||
'description': '7 Ways to Get Rid of Chest Congestion',
|
||||
'duration': 88,
|
||||
'uploader': 'Health',
|
||||
'uploader_id': 'BBPrMqa',
|
||||
'display_id': 'president-macron-interrupts-trump-over-ukraine-funding',
|
||||
'title': 'President Macron interrupts Trump over Ukraine funding',
|
||||
'description': 'md5:5fd3857ac25849e7a56cb25fbe1a2a8b',
|
||||
'uploader': 'k! News UK',
|
||||
'uploader_id': 'BB1hz5Rj',
|
||||
'duration': 59,
|
||||
'thumbnail': 'https://img-s-msn-com.akamaized.net/tenant/amp/entityid/AA1zMagX.img',
|
||||
'tags': 'count:14',
|
||||
'timestamp': 1740510914,
|
||||
'upload_date': '20250225',
|
||||
'release_timestamp': 1740513600,
|
||||
'release_date': '20250225',
|
||||
'modified_timestamp': 1741413241,
|
||||
'modified_date': '20250308',
|
||||
},
|
||||
}, {
|
||||
# Article, multiple Dailymotion Embeds
|
||||
'url': 'https://www.msn.com/en-in/money/sports/hottest-football-wags-greatest-footballers-turned-managers-and-more/ar-BBpc7Nl',
|
||||
'url': 'https://www.msn.com/en-gb/video/watch/films-success-saved-adam-pearsons-acting-career/vi-AA1znZGE?ocid=hpmsn',
|
||||
'info_dict': {
|
||||
'id': 'BBpc7Nl',
|
||||
'id': 'AA1znZGE',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'films-success-saved-adam-pearsons-acting-career',
|
||||
'title': "Films' success saved Adam Pearson's acting career",
|
||||
'description': 'md5:98c05f7bd9ab4f9c423400f62f2d3da5',
|
||||
'uploader': 'Sky News',
|
||||
'uploader_id': 'AA2eki',
|
||||
'duration': 52,
|
||||
'thumbnail': 'https://img-s-msn-com.akamaized.net/tenant/amp/entityid/AA1zo7nU.img',
|
||||
'timestamp': 1739993965,
|
||||
'upload_date': '20250219',
|
||||
'release_timestamp': 1739977753,
|
||||
'release_date': '20250219',
|
||||
'modified_timestamp': 1742076259,
|
||||
'modified_date': '20250315',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Vidible(AOL) Embed
|
||||
'url': 'https://www.msn.com/en-us/money/other/jupiter-is-about-to-come-so-close-you-can-see-its-moons-with-binoculars/vi-AACqsHR',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.msn.com/en-us/entertainment/news/rock-frontman-replacements-you-might-not-know-happened/vi-AA1yLVcD',
|
||||
'info_dict': {
|
||||
'id': 'AA1yLVcD',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'rock-frontman-replacements-you-might-not-know-happened',
|
||||
'title': 'Rock Frontman Replacements You Might Not Know Happened',
|
||||
'description': 'md5:451a125496ff0c9f6816055bb1808da9',
|
||||
'uploader': 'Grunge (Video)',
|
||||
'uploader_id': 'BB1oveoV',
|
||||
'duration': 596,
|
||||
'thumbnail': 'https://img-s-msn-com.akamaized.net/tenant/amp/entityid/AA1yM4OJ.img',
|
||||
'timestamp': 1739223456,
|
||||
'upload_date': '20250210',
|
||||
'release_timestamp': 1739219731,
|
||||
'release_date': '20250210',
|
||||
'modified_timestamp': 1741427272,
|
||||
'modified_date': '20250308',
|
||||
},
|
||||
}, {
|
||||
# Dailymotion Embed
|
||||
'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.msn.com/de-de/nachrichten/other/the-first-descendant-gameplay-trailer-zu-serena-der-neuen-gefl%C3%BCgelten-nachfahrin/vi-AA1B1d06',
|
||||
'info_dict': {
|
||||
'id': 'x9g6oli',
|
||||
'ext': 'mp4',
|
||||
'title': 'The First Descendant: Gameplay-Trailer zu Serena, der neuen geflügelten Nachfahrin',
|
||||
'description': '',
|
||||
'uploader': 'MeinMMO',
|
||||
'uploader_id': 'x2mvqi4',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 60,
|
||||
'thumbnail': 'https://s1.dmcdn.net/v/Y3fO61drj56vPB9SS/x1080',
|
||||
'tags': ['MeinMMO', 'The First Descendant'],
|
||||
'timestamp': 1742124877,
|
||||
'upload_date': '20250316',
|
||||
},
|
||||
}, {
|
||||
# YouTube Embed
|
||||
'url': 'https://www.msn.com/en-in/money/news/meet-vikram-%E2%80%94-chandrayaan-2s-lander/vi-AAGUr0v',
|
||||
'only_matching': True,
|
||||
# Youtube Embed
|
||||
'url': 'https://www.msn.com/en-gb/video/webcontent/web-content/vi-AA1ybFaJ',
|
||||
'info_dict': {
|
||||
'id': 'kQSChWu95nE',
|
||||
'ext': 'mp4',
|
||||
'title': '7 Daily Habits to Nurture Your Personal Growth',
|
||||
'description': 'md5:6f233c68341b74dee30c8c121924e827',
|
||||
'uploader': 'TopThink',
|
||||
'uploader_id': '@TopThink',
|
||||
'uploader_url': 'https://www.youtube.com/@TopThink',
|
||||
'channel': 'TopThink',
|
||||
'channel_id': 'UCMlGmHokrQRp-RaNO7aq4Uw',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCMlGmHokrQRp-RaNO7aq4Uw',
|
||||
'channel_is_verified': True,
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 705,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/kQSChWu95nE/maxresdefault.jpg',
|
||||
'categories': ['Howto & Style'],
|
||||
'tags': ['topthink', 'top think', 'personal growth'],
|
||||
'timestamp': 1722711620,
|
||||
'upload_date': '20240803',
|
||||
'playable_in_embed': True,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
}, {
|
||||
# NBCSports Embed
|
||||
'url': 'https://www.msn.com/en-us/money/football_nfl/week-13-preview-redskins-vs-panthers/vi-BBXsCDb',
|
||||
'only_matching': True,
|
||||
# Article with social embed
|
||||
'url': 'https://www.msn.com/en-in/news/techandscience/watch-earth-sets-and-rises-behind-moon-in-breathtaking-blue-ghost-video/ar-AA1zKoAc',
|
||||
'info_dict': {
|
||||
'id': 'AA1zKoAc',
|
||||
'title': 'Watch: Earth sets and rises behind Moon in breathtaking Blue Ghost video',
|
||||
'description': 'md5:0ad51cfa77e42e7f0c46cf98a619dbbf',
|
||||
'uploader': 'India Today',
|
||||
'uploader_id': 'AAyFWG',
|
||||
'tags': 'count:11',
|
||||
'timestamp': 1740485034,
|
||||
'upload_date': '20250225',
|
||||
'release_timestamp': 1740484875,
|
||||
'release_date': '20250225',
|
||||
'modified_timestamp': 1740488561,
|
||||
'modified_date': '20250225',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, page_id = self._match_valid_url(url).groups()
|
||||
locale, display_id, page_id = self._match_valid_url(url).group('locale', 'display_id', 'id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
json_data = self._download_json(
|
||||
f'https://assets.msn.com/content/view/v2/Detail/{locale}/{page_id}', page_id)
|
||||
|
||||
entries = []
|
||||
for _, metadata in re.findall(r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', webpage):
|
||||
video = self._parse_json(unescapeHTML(metadata), display_id)
|
||||
|
||||
provider_id = video.get('providerId')
|
||||
player_name = video.get('playerName')
|
||||
if player_name and provider_id:
|
||||
entry = None
|
||||
if player_name == 'AOL':
|
||||
if provider_id.startswith('http'):
|
||||
provider_id = self._search_regex(
|
||||
r'https?://delivery\.vidible\.tv/video/redirect/([0-9a-f]{24})',
|
||||
provider_id, 'vidible id')
|
||||
entry = self.url_result(
|
||||
'aol-video:' + provider_id, 'Aol', provider_id)
|
||||
elif player_name == 'Dailymotion':
|
||||
entry = self.url_result(
|
||||
'https://www.dailymotion.com/video/' + provider_id,
|
||||
'Dailymotion', provider_id)
|
||||
elif player_name == 'YouTube':
|
||||
entry = self.url_result(
|
||||
provider_id, 'Youtube', provider_id)
|
||||
elif player_name == 'NBCSports':
|
||||
entry = self.url_result(
|
||||
'http://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/' + provider_id,
|
||||
'NBCSportsVPlayer', provider_id)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
continue
|
||||
|
||||
video_id = video['uuid']
|
||||
title = video['title']
|
||||
common_metadata = traverse_obj(json_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': (('abstract', ('body', {clean_html})), {str}, filter, any),
|
||||
'timestamp': ('createdDateTime', {parse_iso8601}),
|
||||
'release_timestamp': ('publishedDateTime', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedDateTime', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail', 'image', 'url', {url_or_none}),
|
||||
'duration': ('videoMetadata', 'playTime', {int_or_none}),
|
||||
'tags': ('keywords', ..., {str}),
|
||||
'uploader': ('provider', 'name', {str}),
|
||||
'uploader_id': ('provider', 'id', {str}),
|
||||
})
|
||||
|
||||
page_type = json_data['type']
|
||||
source_url = traverse_obj(json_data, ('sourceHref', {url_or_none}))
|
||||
if page_type == 'video':
|
||||
if traverse_obj(json_data, ('thirdPartyVideoPlayer', 'enabled')) and source_url:
|
||||
return self.url_result(source_url)
|
||||
formats = []
|
||||
for file_ in video.get('videoFiles', []):
|
||||
format_url = file_.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
if 'format=m3u8-aapl' in format_url:
|
||||
# m3u8_native should not be used here until
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif 'format=mpd-time-csf' in format_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, 'dash', fatal=False))
|
||||
elif '.ism' in format_url:
|
||||
if format_url.endswith('.ism'):
|
||||
format_url += '/manifest'
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, display_id, 'mss', fatal=False))
|
||||
else:
|
||||
format_id = file_.get('formatCode')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(file_.get('width')),
|
||||
'height': int_or_none(file_.get('height')),
|
||||
'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
|
||||
'quality': 1 if format_id == '1001' else None,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for file_ in video.get('files', []):
|
||||
format_url = file_.get('url')
|
||||
format_code = file_.get('formatCode')
|
||||
if not format_url or not format_code:
|
||||
continue
|
||||
if str(format_code) == '3100':
|
||||
subtitles.setdefault(file_.get('culture', 'en'), []).append({
|
||||
'ext': determine_ext(format_url, 'ttml'),
|
||||
'url': format_url,
|
||||
})
|
||||
for file in traverse_obj(json_data, ('videoMetadata', 'externalVideoFiles', lambda _, v: url_or_none(v['url']))):
|
||||
file_url = file['url']
|
||||
ext = determine_ext(file_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
file_url, page_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
file_url, page_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append(
|
||||
traverse_obj(file, {
|
||||
'url': 'url',
|
||||
'format_id': ('format', {str}),
|
||||
'filesize': ('fileSize', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
}))
|
||||
for caption in traverse_obj(json_data, ('videoMetadata', 'closedCaptions', lambda _, v: url_or_none(v['href']))):
|
||||
lang = caption.get('locale') or 'en-us'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': caption['href'],
|
||||
'ext': 'ttml',
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
return {
|
||||
'id': page_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('headlineImage', {}).get('url'),
|
||||
'duration': int_or_none(video.get('durationSecs')),
|
||||
'uploader': video.get('sourceFriendly'),
|
||||
'uploader_id': video.get('providerId'),
|
||||
'creator': video.get('creator'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
'subtitles': subtitles,
|
||||
**common_metadata,
|
||||
}
|
||||
elif page_type == 'webcontent':
|
||||
if not source_url:
|
||||
raise ExtractorError('Could not find source URL')
|
||||
return self.url_result(source_url)
|
||||
elif page_type == 'article':
|
||||
entries = []
|
||||
for embed_url in traverse_obj(json_data, ('socialEmbeds', ..., 'postUrl', {url_or_none})):
|
||||
entries.append(self.url_result(embed_url))
|
||||
|
||||
if not entries:
|
||||
error = unescapeHTML(self._search_regex(
|
||||
r'data-error=(["\'])(?P<error>.+?)\1',
|
||||
webpage, 'error', group='error'))
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {error}', expected=True)
|
||||
return self.playlist_result(entries, page_id, **common_metadata)
|
||||
|
||||
return self.playlist_result(entries, page_id)
|
||||
raise ExtractorError(f'Unsupported page type: {page_type}')
|
||||
|
||||
@@ -4,7 +4,9 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class N1InfoAssetIE(InfoExtractor):
|
||||
@@ -35,9 +37,9 @@ class N1InfoIIE(InfoExtractor):
|
||||
IE_NAME = 'N1Info:article'
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# Youtube embedded
|
||||
# YouTube embedded
|
||||
'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
|
||||
'md5': '01ddb6646d0fd9c4c7d990aa77fe1c5a',
|
||||
'md5': '987ce6fd72acfecc453281e066b87973',
|
||||
'info_dict': {
|
||||
'id': 'L5Hd4hQVUpk',
|
||||
'ext': 'mp4',
|
||||
@@ -45,7 +47,26 @@ class N1InfoIIE(InfoExtractor):
|
||||
'title': 'Ozmo i USO21, ep. 13: Novak Đoković – Danil Medvedev | Ključevi Poraza, Budućnost | SPORT KLUB TENIS',
|
||||
'description': 'md5:467f330af1effedd2e290f10dc31bb8e',
|
||||
'uploader': 'Sport Klub',
|
||||
'uploader_id': 'sportklub',
|
||||
'uploader_id': '@sportklub',
|
||||
'uploader_url': 'https://www.youtube.com/@sportklub',
|
||||
'channel': 'Sport Klub',
|
||||
'channel_id': 'UChpzBje9Ro6CComXe3BgNaw',
|
||||
'channel_url': 'https://www.youtube.com/channel/UChpzBje9Ro6CComXe3BgNaw',
|
||||
'channel_is_verified': True,
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 1049,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/L5Hd4hQVUpk/maxresdefault.jpg',
|
||||
'chapters': 'count:9',
|
||||
'categories': ['Sports'],
|
||||
'tags': 'count:10',
|
||||
'timestamp': 1631522787,
|
||||
'playable_in_embed': True,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rs.n1info.com/vesti/djilas-los-plan-za-metro-nece-resiti-nijedan-saobracajni-problem/',
|
||||
@@ -55,6 +76,7 @@ class N1InfoIIE(InfoExtractor):
|
||||
'title': 'Đilas: Predlog izgradnje metroa besmislen; SNS odbacuje navode',
|
||||
'upload_date': '20210924',
|
||||
'timestamp': 1632481347,
|
||||
'thumbnail': 'http://n1info.rs/wp-content/themes/ucnewsportal-n1/dist/assets/images/placeholder-image-video.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -67,6 +89,7 @@ class N1InfoIIE(InfoExtractor):
|
||||
'title': 'Zadnji dnevi na kopališču Ilirija: “Ilirija ni umrla, ubili so jo”',
|
||||
'timestamp': 1632567630,
|
||||
'upload_date': '20210925',
|
||||
'thumbnail': 'https://n1info.si/wp-content/uploads/2021/09/06/1630945843-tomaz3.png',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -81,6 +104,14 @@ class N1InfoIIE(InfoExtractor):
|
||||
'upload_date': '20210924',
|
||||
'timestamp': 1632448649.0,
|
||||
'uploader': 'YouLotWhatDontStop',
|
||||
'display_id': 'pu9wbx',
|
||||
'channel_id': 'serbia',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 134,
|
||||
'thumbnail': 'https://external-preview.redd.it/5nmmawSeGx60miQM3Iq-ueC9oyCLTLjjqX-qqY8uRsc.png?format=pjpg&auto=webp&s=2f973400b04d23f871b608b178e47fc01f9b8f1d',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -93,6 +124,7 @@ class N1InfoIIE(InfoExtractor):
|
||||
'title': 'Žaklina Tatalović Ani Brnabić: Pričate laži (VIDEO)',
|
||||
'upload_date': '20211102',
|
||||
'timestamp': 1635861677,
|
||||
'thumbnail': 'https://nova.rs/wp-content/uploads/2021/11/02/1635860298-TNJG_Ana_Brnabic_i_Zaklina_Tatalovic_100_dana_Vlade_GP.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
|
||||
@@ -104,6 +136,16 @@ class N1InfoIIE(InfoExtractor):
|
||||
'timestamp': 1687290536,
|
||||
'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n1info.rs/vesti/vuciceva-turneja-po-srbiji-najavljuje-kontrarevoluciju-preti-svom-narodu-vredja-novinare/',
|
||||
'info_dict': {
|
||||
'id': '2025974',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vučićeva turneja po Srbiji: Najavljuje kontrarevoluciju, preti svom narodu, vređa novinare',
|
||||
'thumbnail': 'https://cdn-uc.brid.tv/live/partners/26827/snapshot/2025974_fhd_67c4a23280a81_1740939826.jpg',
|
||||
'timestamp': 1740939936,
|
||||
'upload_date': '20250302',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
|
||||
'only_matching': True,
|
||||
@@ -115,11 +157,11 @@ class N1InfoIIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
|
||||
timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
|
||||
plugin_data = self._html_search_meta('BridPlugin', webpage)
|
||||
plugin_data = re.findall(r'\$bp\("(?:Brid|TargetVideo)_\d+",\s(.+)\);', webpage)
|
||||
entries = []
|
||||
if plugin_data:
|
||||
site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id')
|
||||
for video_data in re.findall(r'\$bp\("Brid_\d+", (.+)\);', webpage):
|
||||
for video_data in plugin_data:
|
||||
video_id = self._parse_json(video_data, title)['video']
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
@@ -140,7 +182,7 @@ class N1InfoIIE(InfoExtractor):
|
||||
'url': video_data.get('data-url'),
|
||||
'id': video_data.get('id'),
|
||||
'title': title,
|
||||
'thumbnail': video_data.get('data-thumbnail'),
|
||||
'thumbnail': traverse_obj(video_data, (('data-thumbnail', 'data-default_thumbnail'), {url_or_none}, any)),
|
||||
'timestamp': timestamp,
|
||||
'ie_key': 'N1InfoAsset',
|
||||
})
|
||||
@@ -152,7 +194,7 @@ class N1InfoIIE(InfoExtractor):
|
||||
if url.startswith('https://www.youtube.com'):
|
||||
entries.append(self.url_result(url, ie='Youtube'))
|
||||
elif url.startswith('https://www.redditmedia.com'):
|
||||
entries.append(self.url_result(url, ie='RedditR'))
|
||||
entries.append(self.url_result(url, ie='Reddit'))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
@@ -72,6 +72,7 @@ class NaverBaseIE(InfoExtractor):
|
||||
'abr': int_or_none(bitrate.get('audio')),
|
||||
'filesize': int_or_none(stream.get('size')),
|
||||
'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
|
||||
'extra_param_to_segment_url': urllib.parse.urlencode(query, doseq=True) if stream_type == 'HLS' else None,
|
||||
})
|
||||
|
||||
extract_formats(get_list('video'), 'H264')
|
||||
@@ -168,6 +169,26 @@ class NaverIE(NaverBaseIE):
|
||||
'duration': 277,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.naver.com/v/67838091',
|
||||
'md5': '126ea384ab033bca59672c12cca7a6be',
|
||||
'info_dict': {
|
||||
'id': '67838091',
|
||||
'ext': 'mp4',
|
||||
'title': '[라인W 날씨] 내일 아침 서울 체감 -19도…호남·충남 대설',
|
||||
'description': 'md5:fe026e25634c85845698aed4b59db5a7',
|
||||
'timestamp': 1736347853,
|
||||
'upload_date': '20250108',
|
||||
'uploader': 'KBS뉴스',
|
||||
'uploader_id': 'kbsnews',
|
||||
'uploader_url': 'https://tv.naver.com/kbsnews',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 69,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
'params': {'format': 'HLS_144P'},
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -736,7 +736,7 @@ class NBCStationsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
nbc_data = self._search_json(
|
||||
r'<script>\s*var\s+nbc\s*=', webpage, 'NBC JSON data', video_id)
|
||||
r'(?:<script>\s*var\s+nbc\s*=|Object\.assign\(nbc,)', webpage, 'NBC JSON data', video_id)
|
||||
pdk_acct = nbc_data.get('pdkAcct') or 'Yh1nAC'
|
||||
fw_ssid = traverse_obj(nbc_data, ('video', 'fwSSID'))
|
||||
|
||||
|
||||
117
yt_dlp/extractor/nest.py
Normal file
117
yt_dlp/extractor/nest.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, float_or_none, update_url_query, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NestIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.nest\.com/(?:embedded/)?live/(?P<id>\w+)'
|
||||
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://video.nest.com/embedded/live/4fvYdSo8AX?autoplay=0',
|
||||
'info_dict': {
|
||||
'id': '4fvYdSo8AX',
|
||||
'ext': 'mp4',
|
||||
'title': 'startswith:Outside ',
|
||||
'alt_title': 'Outside',
|
||||
'description': '<null>',
|
||||
'location': 'Los Angeles',
|
||||
'availability': 'public',
|
||||
'thumbnail': r're:https?://',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.nest.com/live/4fvYdSo8AX',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.pacificblue.biz/noyo-harbor-webcam/',
|
||||
'info_dict': {
|
||||
'id': '4fvYdSo8AX',
|
||||
'ext': 'mp4',
|
||||
'title': 'startswith:Outside ',
|
||||
'alt_title': 'Outside',
|
||||
'description': '<null>',
|
||||
'location': 'Los Angeles',
|
||||
'availability': 'public',
|
||||
'thumbnail': r're:https?://',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
item = self._download_json(
|
||||
'https://video.nest.com/api/dropcam/cameras.get_by_public_token',
|
||||
video_id, query={'token': video_id})['items'][0]
|
||||
uuid = item.get('uuid')
|
||||
stream_domain = item.get('live_stream_host')
|
||||
if not stream_domain or not uuid:
|
||||
raise ExtractorError('Unable to construct playlist URL')
|
||||
|
||||
thumb_domain = item.get('nexus_api_nest_domain_host')
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(item, {
|
||||
'description': ('description', {str}),
|
||||
'title': (('title', 'name', 'where'), {str}, filter, any),
|
||||
'alt_title': ('name', {str}),
|
||||
'location': ((('timezone', {lambda x: x.split('/')[1].replace('_', ' ')}), 'where'), {str}, filter, any),
|
||||
}),
|
||||
'thumbnail': update_url_query(
|
||||
f'https://{thumb_domain}/get_image',
|
||||
{'uuid': uuid, 'public': video_id}) if thumb_domain else None,
|
||||
'availability': self._availability(is_private=item.get('is_public') is False),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
f'https://{stream_domain}/nexus_aac/{uuid}/playlist.m3u8',
|
||||
video_id, 'mp4', live=True, query={'public': video_id}),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class NestClipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.nest\.com/(?:embedded/)?clip/(?P<id>\w+)'
|
||||
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://video.nest.com/clip/f34c9dd237a44eca9a0001af685e3dff',
|
||||
'info_dict': {
|
||||
'id': 'f34c9dd237a44eca9a0001af685e3dff',
|
||||
'ext': 'mp4',
|
||||
'title': 'NestClip video #f34c9dd237a44eca9a0001af685e3dff',
|
||||
'thumbnail': 'https://clips.dropcam.com/f34c9dd237a44eca9a0001af685e3dff.jpg',
|
||||
'timestamp': 1735413474.468,
|
||||
'upload_date': '20241228',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.nest.com/embedded/clip/34e0432adc3c46a98529443d8ad5aa76',
|
||||
'info_dict': {
|
||||
'id': '34e0432adc3c46a98529443d8ad5aa76',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootout at Veterans Boulevard at Fleur De Lis Drive',
|
||||
'thumbnail': 'https://clips.dropcam.com/34e0432adc3c46a98529443d8ad5aa76.jpg',
|
||||
'upload_date': '20230817',
|
||||
'timestamp': 1692262897.191,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://video.nest.com/api/dropcam/videos.get_by_filename', video_id,
|
||||
query={'filename': f'{video_id}.mp4'})
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(data, ('items', 0, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'url': ('download_url', {url_or_none}),
|
||||
'timestamp': ('start_time', {float_or_none}),
|
||||
})),
|
||||
}
|
||||
@@ -13,11 +13,13 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
remove_start,
|
||||
@@ -26,6 +28,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
@@ -430,6 +433,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'format_id': ('id', {str}),
|
||||
'abr': ('bitRate', {float_or_none(scale=1000)}),
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
'quality': ('qualityLevel', {int_or_none}),
|
||||
}), get_all=False),
|
||||
'acodec': 'aac',
|
||||
}
|
||||
@@ -441,7 +445,9 @@ class NiconicoIE(InfoExtractor):
|
||||
min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000
|
||||
for video_fmt in video_fmts:
|
||||
video_fmt['tbr'] -= min_abr
|
||||
video_fmt['format_id'] = f'video-{video_fmt["tbr"]:.0f}'
|
||||
video_fmt['format_id'] = url_basename(video_fmt['url']).rpartition('.')[0]
|
||||
video_fmt['quality'] = traverse_obj(videos, (
|
||||
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1
|
||||
yield video_fmt
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -592,8 +598,8 @@ class NiconicoPlaylistBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _parse_owner(item):
|
||||
return {
|
||||
'uploader': traverse_obj(item, ('owner', 'name')),
|
||||
'uploader_id': traverse_obj(item, ('owner', 'id')),
|
||||
'uploader': traverse_obj(item, ('owner', ('name', ('user', 'nickname')), {str}, any)),
|
||||
'uploader_id': traverse_obj(item, ('owner', 'id', {str})),
|
||||
}
|
||||
|
||||
def _fetch_page(self, list_id, page):
|
||||
@@ -666,7 +672,7 @@ class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
|
||||
mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))
|
||||
|
||||
|
||||
class NiconicoSeriesIE(InfoExtractor):
|
||||
class NiconicoSeriesIE(NiconicoPlaylistBaseIE):
|
||||
IE_NAME = 'niconico:series'
|
||||
_VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp(?:/user/\d+)?|nico\.ms)/series/(?P<id>\d+)'
|
||||
|
||||
@@ -675,6 +681,9 @@ class NiconicoSeriesIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '110226',
|
||||
'title': 'ご立派ァ!のシリーズ',
|
||||
'description': '楽しそうな外人の吹き替えをさせたら終身名誉ホモガキの右に出る人はいませんね…',
|
||||
'uploader': 'アルファるふぁ',
|
||||
'uploader_id': '44113208',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
@@ -682,6 +691,9 @@ class NiconicoSeriesIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '12312',
|
||||
'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
|
||||
'description': '',
|
||||
'uploader': '野鳥',
|
||||
'uploader_id': '2275360',
|
||||
},
|
||||
'playlist_mincount': 103,
|
||||
}, {
|
||||
@@ -689,19 +701,21 @@ class NiconicoSeriesIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, list_id, resource, query):
|
||||
return self._download_json(
|
||||
f'https://nvapi.nicovideo.jp/v2/series/{list_id}', list_id,
|
||||
f'Downloading {resource}', query=query,
|
||||
headers=self._API_HEADERS)['data']
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
series = self._call_api(list_id, 'list', {
|
||||
'pageSize': 1,
|
||||
})['detail']
|
||||
|
||||
title = self._search_regex(
|
||||
(r'<title>「(.+)(全',
|
||||
r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
|
||||
webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = unescapeHTML(title)
|
||||
json_data = next(self._yield_json_ld(webpage, None, fatal=False))
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(json_data, ('itemListElement', ..., 'url')), list_id, title, ie=NiconicoIE)
|
||||
return self.playlist_result(
|
||||
self._entries(list_id), list_id,
|
||||
series.get('title'), series.get('description'), **self._parse_owner(series))
|
||||
|
||||
|
||||
class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
|
||||
@@ -1025,6 +1039,7 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
thumbnails.append({
|
||||
'id': f'{name}_{width}x{height}',
|
||||
'url': img_url,
|
||||
'ext': traverse_obj(parse_qs(img_url), ('image', 0, {determine_ext(default_ext='jpg')})),
|
||||
**res,
|
||||
})
|
||||
|
||||
|
||||
@@ -1,34 +1,46 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj, value
|
||||
|
||||
|
||||
class NineNowIE(InfoExtractor):
|
||||
IE_NAME = '9now.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/?#]+/){2}(?P<id>(?P<type>clip|episode)-[^/?#]+)'
|
||||
_GEO_BYPASS = False
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
|
||||
'md5': '17cf47d63ec9323e562c9957a968b565',
|
||||
'url': 'https://www.9now.com.au/today/season-2025/clip-cm8hw9h5z00080hquqa5hszq7',
|
||||
'info_dict': {
|
||||
'id': '16801',
|
||||
'id': '6370295582112',
|
||||
'ext': 'mp4',
|
||||
'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike',
|
||||
'description': 'Is a boycott of the NAB Cup "on the table"?',
|
||||
'title': 'Would Karl Stefanovic be able to land a plane?',
|
||||
'description': 'The Today host\'s skills are put to the test with the latest simulation tech.',
|
||||
'uploader_id': '4460760524001',
|
||||
'upload_date': '20160713',
|
||||
'timestamp': 1468421266,
|
||||
'duration': 197.376,
|
||||
'tags': ['flights', 'technology', 'Karl Stefanovic'],
|
||||
'season': 'Season 2025',
|
||||
'season_number': 2025,
|
||||
'series': 'TODAY',
|
||||
'timestamp': 1742507988,
|
||||
'upload_date': '20250320',
|
||||
'release_timestamp': 1742507983,
|
||||
'release_date': '20250320',
|
||||
'thumbnail': r're:https?://.+/1920x0/.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS/DASH fragments and mp4 URLs are geo-restricted; only available in AU',
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
}, {
|
||||
# episode
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19',
|
||||
@@ -41,7 +53,7 @@ class NineNowIE(InfoExtractor):
|
||||
# episode of series
|
||||
'url': 'https://www.9now.com.au/lego-masters/season-3/episode-3',
|
||||
'info_dict': {
|
||||
'id': '6249614030001',
|
||||
'id': '6308830406112',
|
||||
'title': 'Episode 3',
|
||||
'ext': 'mp4',
|
||||
'season_number': 3,
|
||||
@@ -50,72 +62,87 @@ class NineNowIE(InfoExtractor):
|
||||
'uploader_id': '4460760524001',
|
||||
'timestamp': 1619002200,
|
||||
'upload_date': '20210421',
|
||||
'duration': 3574.085,
|
||||
'thumbnail': r're:https?://.+/1920x0/.+\.jpg',
|
||||
'tags': ['episode'],
|
||||
'series': 'Lego Masters',
|
||||
'season': 'Season 3',
|
||||
'episode': 'Episode 3',
|
||||
'release_timestamp': 1619002200,
|
||||
'release_date': '20210421',
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': 'HLS/DASH fragments and mp4 URLs are geo-restricted; only available in AU',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.9now.com.au/married-at-first-sight/season-12/episode-1',
|
||||
'info_dict': {
|
||||
'id': '6367798770112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1',
|
||||
'description': r're:The cultural sensation of Married At First Sight returns with our first weddings! .{90}$',
|
||||
'uploader_id': '4460760524001',
|
||||
'duration': 5415.079,
|
||||
'thumbnail': r're:https?://.+/1920x0/.+\.png',
|
||||
'tags': ['episode'],
|
||||
'season': 'Season 12',
|
||||
'season_number': 12,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'series': 'Married at First Sight',
|
||||
'timestamp': 1737973800,
|
||||
'upload_date': '20250127',
|
||||
'release_timestamp': 1737973800,
|
||||
'release_date': '20250127',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS/DASH fragments and mp4 URLs are geo-restricted; only available in AU',
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}'
|
||||
|
||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv and yt_dlp.extractor.goplay
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
display_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
page_data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.*?});', webpage,
|
||||
'page data', default='{}'), display_id, fatal=False)
|
||||
if not page_data:
|
||||
page_data = self._parse_json(self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
|
||||
webpage, 'page data'), display_id), display_id)
|
||||
|
||||
for kind in ('episode', 'clip'):
|
||||
current_key = page_data.get(kind, {}).get(
|
||||
f'current{kind.capitalize()}Key')
|
||||
if not current_key:
|
||||
continue
|
||||
cache = page_data.get(kind, {}).get(f'{kind}Cache', {})
|
||||
if not cache:
|
||||
continue
|
||||
common_data = {
|
||||
'episode': (cache.get(current_key) or next(iter(cache.values())))[kind],
|
||||
'season': (cache.get(current_key) or next(iter(cache.values()))).get('season', None),
|
||||
}
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Unable to find video data')
|
||||
common_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {json.loads}, ..., {self._find_json},
|
||||
lambda _, v: v['payload'][video_type]['slug'] == display_id,
|
||||
'payload', any, {require('video data')}))
|
||||
|
||||
if not self.get_param('allow_unplayable_formats') and try_get(common_data, lambda x: x['episode']['video']['drm'], bool):
|
||||
if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})):
|
||||
self.report_drm(display_id)
|
||||
brightcove_id = try_get(
|
||||
common_data, lambda x: x['episode']['video']['brightcoveId'], str) or 'ref:{}'.format(common_data['episode']['video']['referenceId'])
|
||||
video_id = str_or_none(try_get(common_data, lambda x: x['episode']['video']['id'])) or brightcove_id
|
||||
|
||||
title = try_get(common_data, lambda x: x['episode']['name'], str)
|
||||
season_number = try_get(common_data, lambda x: x['season']['seasonNumber'], int)
|
||||
episode_number = try_get(common_data, lambda x: x['episode']['episodeNumber'], int)
|
||||
timestamp = unified_timestamp(try_get(common_data, lambda x: x['episode']['airDate'], str))
|
||||
release_date = unified_strdate(try_get(common_data, lambda x: x['episode']['availability'], str))
|
||||
thumbnails_data = try_get(common_data, lambda x: x['episode']['image']['sizes'], dict) or {}
|
||||
thumbnails = [{
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail_id[1:]),
|
||||
} for thumbnail_id, thumbnail_url in thumbnails_data.items()]
|
||||
brightcove_id = traverse_obj(common_data, (
|
||||
video_type, 'video', (
|
||||
('brightcoveId', {str}),
|
||||
('referenceId', {str}, {lambda x: f'ref:{x}' if x else None}),
|
||||
), any, {require('brightcove ID')}))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': self._GEO_COUNTRIES}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': try_get(common_data, lambda x: x['episode']['description'], str),
|
||||
'duration': float_or_none(try_get(common_data, lambda x: x['episode']['video']['duration'], float), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'timestamp': timestamp,
|
||||
'release_date': release_date,
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE.format(brightcove_id),
|
||||
**traverse_obj(common_data, {
|
||||
'id': (video_type, 'video', 'id', {int}, ({str_or_none}, {value(brightcove_id)}), any),
|
||||
'title': (video_type, 'name', {str}),
|
||||
'description': (video_type, 'description', {str}),
|
||||
'duration': (video_type, 'video', 'duration', {float_or_none(scale=1000)}),
|
||||
'tags': (video_type, 'tags', ..., 'name', {str}, all, filter),
|
||||
'series': ('tvSeries', 'name', {str}),
|
||||
'season_number': ('season', 'seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episode', 'episodeNumber', {int_or_none}),
|
||||
'timestamp': ('episode', 'airDate', {parse_iso8601}),
|
||||
'release_timestamp': (video_type, 'availability', {parse_iso8601}),
|
||||
'thumbnails': (video_type, 'image', 'sizes', {dict.items}, lambda _, v: url_or_none(v[1]), {
|
||||
'id': 0,
|
||||
'url': 1,
|
||||
'width': (1, {parse_resolution}, 'width'),
|
||||
}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
try_get,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
@@ -27,6 +28,12 @@ class NRKBaseIE(InfoExtractor):
|
||||
)/'''
|
||||
|
||||
def _extract_nrk_formats(self, asset_url, video_id):
|
||||
asset_url = update_url_query(asset_url, {
|
||||
# Remove 'adap' to return all streams (known values are: small, large, small_h265, large_h265)
|
||||
'adap': [],
|
||||
# Disable subtitles since they are fetched separately
|
||||
's': 0,
|
||||
})
|
||||
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
||||
return self._extract_akamai_formats(asset_url, video_id)
|
||||
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
|
||||
@@ -58,7 +65,10 @@ class NRKBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
urljoin('https://psapi.nrk.no/', path),
|
||||
video_id, note or f'Downloading {item} JSON',
|
||||
fatal=fatal, query=query)
|
||||
fatal=fatal, query=query, headers={
|
||||
# Needed for working stream URLs, see https://github.com/yt-dlp/yt-dlp/issues/12192
|
||||
'Accept': 'application/vnd.nrk.psapi+json; version=9; player=tv-player; device=player-core',
|
||||
})
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
@@ -77,13 +87,17 @@ class NRKIE(NRKBaseIE):
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': 'f46be075326e23ad0e524edfcb06aeb6',
|
||||
'md5': '2b88a652ad2e275591e61cf550887eec',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 262,
|
||||
'upload_date': '20140325',
|
||||
'thumbnail': r're:^https?://gfx\.nrk\.no/.*$',
|
||||
'timestamp': 1395751833,
|
||||
'alt_title': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
},
|
||||
}, {
|
||||
# audio
|
||||
@@ -95,6 +109,10 @@ class NRKIE(NRKBaseIE):
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
'timestamp': 1398429565,
|
||||
'alt_title': 'Cathrine Lie Wathne er blind, og bruker hurtigtaster for å navigere seg rundt på ulike nettsider.',
|
||||
'thumbnail': 'https://gfx.nrk.no/urxQMSXF-WnbfjBH5ke2igLGyN27EdJVWZ6FOsEAclhA',
|
||||
'upload_date': '20140425',
|
||||
},
|
||||
}, {
|
||||
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
@@ -152,7 +170,7 @@ class NRKIE(NRKBaseIE):
|
||||
return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query)
|
||||
raise
|
||||
|
||||
# known values for preferredCdn: akamai, iponly, minicdn and telenor
|
||||
# known values for preferredCdn: akamai, globalconnect and telenor
|
||||
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
|
||||
|
||||
video_id = try_get(manifest, lambda x: x['id'], str) or video_id
|
||||
@@ -307,6 +325,13 @@ class NRKTVIE(InfoExtractor):
|
||||
'ext': 'vtt',
|
||||
}],
|
||||
},
|
||||
'upload_date': '20170627',
|
||||
'timestamp': 1498591822,
|
||||
'thumbnail': 'https://gfx.nrk.no/myRSc4vuFlahB60P3n6swwRTQUZI1LqJZl9B7icZFgzA',
|
||||
'alt_title': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
@@ -321,6 +346,13 @@ class NRKTVIE(InfoExtractor):
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23. mai 2014',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1584593700,
|
||||
'thumbnail': 'https://gfx.nrk.no/u7uCe79SEfPVGRAGVp2_uAZnNc4mfz_kjXg6Bgek8lMQ',
|
||||
'season_id': '126936',
|
||||
'upload_date': '20200319',
|
||||
'season': 'Season 2014',
|
||||
'season_number': 2014,
|
||||
'episode_number': 3,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
|
||||
@@ -343,7 +343,7 @@ class NYTimesCookingIE(NYTimesBaseIE):
|
||||
if media_ids:
|
||||
media_ids.append(lead_video_id)
|
||||
return self.playlist_result(
|
||||
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
|
||||
map(self._extract_video, media_ids), page_id, title, description)
|
||||
|
||||
return {
|
||||
**self._extract_video(lead_video_id),
|
||||
|
||||
@@ -67,7 +67,7 @@ class OpenRecBaseIE(InfoExtractor):
|
||||
|
||||
class OpenRecIE(OpenRecBaseIE):
|
||||
IE_NAME = 'openrec'
|
||||
_VALID_URL = r'https?://(?:www\.)?openrec\.tv/live/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?openrec\.tv/live/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.openrec.tv/live/2p8v31qe4zy',
|
||||
'only_matching': True,
|
||||
@@ -85,7 +85,7 @@ class OpenRecIE(OpenRecBaseIE):
|
||||
|
||||
class OpenRecCaptureIE(OpenRecBaseIE):
|
||||
IE_NAME = 'openrec:capture'
|
||||
_VALID_URL = r'https?://(?:www\.)?openrec\.tv/capture/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?openrec\.tv/capture/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.openrec.tv/capture/l9nk2x4gn14',
|
||||
'only_matching': True,
|
||||
@@ -129,7 +129,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
|
||||
|
||||
class OpenRecMovieIE(OpenRecBaseIE):
|
||||
IE_NAME = 'openrec:movie'
|
||||
_VALID_URL = r'https?://(?:www\.)?openrec\.tv/movie/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?openrec\.tv/movie/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.openrec.tv/movie/nqz5xl5km8v',
|
||||
'info_dict': {
|
||||
@@ -141,6 +141,9 @@ class OpenRecMovieIE(OpenRecBaseIE):
|
||||
'uploader_id': 'taiki_to_kazuhiro',
|
||||
'timestamp': 1638856800,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.openrec.tv/movie/2p8vvex548y?playlist_id=98brq96vvsgn2nd',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -63,6 +63,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
'info_dict': {
|
||||
'id': '743933',
|
||||
'ext': 'mp3',
|
||||
'alt_title': 'cd166.mp3',
|
||||
'title': 'Episode 166: David Smalley of Dogma Debate',
|
||||
'description': 'md5:34d207dd29aa90e24f1b3f58841b81c7',
|
||||
'uploader': 'Cognitive Dissonance Podcast',
|
||||
@@ -280,7 +281,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
post = self._call_api(
|
||||
f'posts/{video_id}', video_id, query={
|
||||
'fields[media]': 'download_url,mimetype,size_bytes',
|
||||
'fields[media]': 'download_url,mimetype,size_bytes,file_name',
|
||||
'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title,current_user_can_view',
|
||||
'fields[user]': 'full_name,url',
|
||||
'fields[post_tag]': 'value',
|
||||
@@ -317,6 +318,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
'ext': ext,
|
||||
'filesize': size_bytes,
|
||||
'url': download_url,
|
||||
'alt_title': traverse_obj(media_attributes, ('file_name', {str})),
|
||||
})
|
||||
|
||||
elif include_type == 'user':
|
||||
@@ -457,7 +459,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?patreon\.com/(?:
|
||||
(?:m|api/campaigns)/(?P<campaign_id>\d+)|
|
||||
(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||
(?:c/)?(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||
)(?:/posts)?/?(?:$|[?#])'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.patreon.com/dissonancepod/',
|
||||
@@ -509,6 +511,26 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
'thumbnail': r're:^https?://.*$',
|
||||
},
|
||||
'playlist_mincount': 201,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/c/OgSog',
|
||||
'info_dict': {
|
||||
'id': '8504388',
|
||||
'title': 'OGSoG',
|
||||
'description': r're:(?s)Hello and welcome to our Patreon page. We are Mari, Lasercorn, .+',
|
||||
'channel': 'OGSoG',
|
||||
'channel_id': '8504388',
|
||||
'channel_url': 'https://www.patreon.com/OgSog',
|
||||
'uploader_url': 'https://www.patreon.com/OgSog',
|
||||
'uploader_id': '72323575',
|
||||
'uploader': 'David Moss',
|
||||
'thumbnail': r're:https?://.+/.+',
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'playlist_mincount': 331,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/c/OgSog/posts',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/dissonancepod/posts',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -47,7 +47,7 @@ class PBSIE(InfoExtractor):
|
||||
(r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
|
||||
(r'video\.kqed\.org', 'KQED (KQED)'), # http://www.kqed.org
|
||||
(r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
|
||||
(r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
|
||||
(r'(?:video\.|www\.)pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
|
||||
(r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
|
||||
(r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
|
||||
(r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
|
||||
@@ -61,7 +61,7 @@ class PBSIE(InfoExtractor):
|
||||
(r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
|
||||
(r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
|
||||
(r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
|
||||
(r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
|
||||
(r'(?:video\.|www\.)thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
|
||||
(r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
|
||||
(r'video\.wgby\.org', 'WGBY (WGBY)'), # http://www.wgby.org
|
||||
(r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
|
||||
@@ -185,12 +185,13 @@ class PBSIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
# Direct video URL
|
||||
(?:{})/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
|
||||
# Article with embedded player (or direct video)
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){{1,5}}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
||||
# Player
|
||||
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)
|
||||
# Player
|
||||
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/?#]+) |
|
||||
# Direct video URL, or article with embedded player
|
||||
(?:{})/(?:
|
||||
(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/#]|$) |
|
||||
(?:[^/?#]+/){{1,5}}(?P<presumptive_id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])
|
||||
)
|
||||
)
|
||||
'''.format('|'.join(next(zip(*_STATIONS))))
|
||||
|
||||
@@ -207,16 +208,40 @@ class PBSIE(InfoExtractor):
|
||||
'description': 'md5:31b664af3c65fd07fa460d306b837d00',
|
||||
'duration': 3190,
|
||||
},
|
||||
'skip': 'dead URL',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.thirteen.org/programs/the-woodwrights-shop/carving-away-with-mary-may-tioglz/',
|
||||
'info_dict': {
|
||||
'id': '3004803331',
|
||||
'ext': 'mp4',
|
||||
'title': "The Woodwright's Shop - Carving Away with Mary May",
|
||||
'description': 'md5:7cbaaaa8b9bcc78bd8f0e31911644e28',
|
||||
'duration': 1606,
|
||||
'display_id': 'carving-away-with-mary-may-tioglz',
|
||||
'chapters': [],
|
||||
'thumbnail': 'https://image.pbs.org/video-assets/NcnTxNl-asset-mezzanine-16x9-K0Keoyv.jpg',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
|
||||
'md5': '6f722cb3c3982186d34b0f13374499c7',
|
||||
'md5': '372b12b670070de39438b946474df92f',
|
||||
'info_dict': {
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:5979a4d069b157f622d02bff62fbe654',
|
||||
'duration': 5050,
|
||||
'chapters': [
|
||||
{'start_time': 0.0, 'end_time': 1234.0, 'title': 'After Saddam, Chaos'},
|
||||
{'start_time': 1233.0, 'end_time': 1719.0, 'title': 'The Insurgency Takes Root'},
|
||||
{'start_time': 1718.0, 'end_time': 2461.0, 'title': 'A Light Footprint'},
|
||||
{'start_time': 2460.0, 'end_time': 3589.0, 'title': 'The Surge '},
|
||||
{'start_time': 3588.0, 'end_time': 4355.0, 'title': 'The Withdrawal '},
|
||||
{'start_time': 4354.0, 'end_time': 5051.0, 'title': 'ISIS on the March '},
|
||||
],
|
||||
'display_id': 'losing-iraq',
|
||||
'thumbnail': 'https://image.pbs.org/video-assets/pbs/frontline/138098/images/mezzanine_401.jpg',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -403,6 +428,19 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.pbssocal.org/shows/newshour/clip/capehart-johnson-1715984001',
|
||||
'info_dict': {
|
||||
'id': '3091549094',
|
||||
'ext': 'mp4',
|
||||
'title': 'PBS NewsHour - Capehart and Johnson on the unusual Biden-Trump debate plans',
|
||||
'description': 'Capehart and Johnson on how the Biden-Trump debates could shape the campaign season',
|
||||
'display_id': 'capehart-johnson-1715984001',
|
||||
'duration': 593,
|
||||
'thumbnail': 'https://image.pbs.org/video-assets/mF3oSVn-asset-mezzanine-16x9-QeXjXPy.jpg',
|
||||
'chapters': [],
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||
'only_matching': True,
|
||||
@@ -463,10 +501,12 @@ class PBSIE(InfoExtractor):
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
r'class="coveplayerid">([^<]+)<', # coveplayer
|
||||
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
|
||||
r'\sclass="passportcoveplayer"[^>]*\sdata-media="(\d+)', # https://www.thirteen.org/programs/the-woodwrights-shop/who-wrote-the-book-of-sloyd-fggvvq/
|
||||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
|
||||
r'\bhttps?://player\.pbs\.org/[\w-]+player/(\d+)', # last pattern to avoid false positives
|
||||
]
|
||||
|
||||
media_id = self._search_regex(
|
||||
|
||||
@@ -23,9 +23,9 @@ class PinterestBaseIE(InfoExtractor):
|
||||
def _call_api(self, resource, video_id, options):
|
||||
return self._download_json(
|
||||
f'https://www.pinterest.com/resource/{resource}Resource/get/',
|
||||
video_id, f'Download {resource} JSON metadata', query={
|
||||
'data': json.dumps({'options': options}),
|
||||
})['resource_response']
|
||||
video_id, f'Download {resource} JSON metadata',
|
||||
query={'data': json.dumps({'options': options})},
|
||||
headers={'X-Pinterest-PWS-Handler': 'www/[username].js'})['resource_response']
|
||||
|
||||
def _extract_video(self, data, extract_formats=True):
|
||||
video_id = data['id']
|
||||
|
||||
99
yt_dlp/extractor/piramidetv.py
Normal file
99
yt_dlp/extractor/piramidetv.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, smuggle_url, unsmuggle_url, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PiramideTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://piramide\.tv/video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://piramide.tv/video/wWtBAORdJUTh',
|
||||
'info_dict': {
|
||||
'id': 'wWtBAORdJUTh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:79f9c8183ea6a35c836923142cf0abcc',
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/W86PgQDn/thumbnails/B9gpIxkH.jpg',
|
||||
'channel': 'León Picarón',
|
||||
'channel_id': 'leonpicaron',
|
||||
'timestamp': 1696460362,
|
||||
'upload_date': '20231004',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://piramide.tv/video/wcYn6li79NgN',
|
||||
'info_dict': {
|
||||
'id': 'wcYn6li79NgN',
|
||||
'ext': 'mp4',
|
||||
'title': 'ACEPTO TENER UN BEBE CON MI NOVIA\u2026? | Parte 1',
|
||||
'description': '',
|
||||
'channel': 'ARTA GAME',
|
||||
'channel_id': 'arta_game',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/cnEdGp5X/thumbnails/rHAaWfP7.jpg',
|
||||
'timestamp': 1703434976,
|
||||
'upload_date': '20231224',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id):
|
||||
video_data = self._download_json(
|
||||
f'https://hermes.piramide.tv/video/data/{video_id}', video_id, fatal=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://cdn.piramide.tv/video/{video_id}/manifest.m3u8', video_id, fatal=False)
|
||||
next_video = traverse_obj(video_data, ('video', 'next_video', 'id', {str}))
|
||||
return next_video, {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_data, ('video', {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('media', 'thumbnail', {url_or_none}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
})),
|
||||
}
|
||||
|
||||
def _entries(self, video_id):
|
||||
visited = set()
|
||||
while True:
|
||||
visited.add(video_id)
|
||||
next_video, info = self._extract_video(video_id)
|
||||
yield info
|
||||
if not next_video or next_video in visited:
|
||||
break
|
||||
video_id = next_video
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
if self._yes_playlist(video_id, video_id, smuggled_data):
|
||||
return self.playlist_result(self._entries(video_id), video_id)
|
||||
return self._extract_video(video_id)[1]
|
||||
|
||||
|
||||
class PiramideTVChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://piramide\.tv/channel/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://piramide.tv/channel/thekalo',
|
||||
'playlist_mincount': 10,
|
||||
'info_dict': {
|
||||
'id': 'thekalo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, channel_name):
|
||||
videos = self._download_json(
|
||||
f'https://hermes.piramide.tv/channel/list/{channel_name}/date/100000', channel_name)
|
||||
for video in traverse_obj(videos, ('videos', lambda _, v: v['id'])):
|
||||
yield self.url_result(smuggle_url(
|
||||
f'https://piramide.tv/video/{video["id"]}', {'force_noplaylist': True}),
|
||||
**traverse_obj(video, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
return self.playlist_result(self._entries(channel_name), channel_name)
|
||||
@@ -1,4 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
@@ -110,8 +111,8 @@ class PixivSketchUserIE(PixivSketchBaseIE):
|
||||
if not traverse_obj(data, 'is_broadcasting'):
|
||||
try:
|
||||
self._call_api(user_id, 'users/current.json', url, 'Investigating reason for request failure')
|
||||
except ExtractorError as ex:
|
||||
if ex.cause and ex.cause.code == 401:
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
self.raise_login_required(f'Please log in, or use direct link like https://sketch.pixiv.net/@{user_id}/1234567890', method='cookies')
|
||||
raise ExtractorError('This user is offline', expected=True)
|
||||
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -142,39 +145,73 @@ class PlaySuisseIE(InfoExtractor):
|
||||
id
|
||||
url
|
||||
}'''
|
||||
_LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
|
||||
_LOGIN_PATH = 'B2C_1A__SignInV2'
|
||||
_CLIENT_ID = '1e33f1bf-8bf3-45e4-bbd9-c9ad934b5fca'
|
||||
_LOGIN_BASE = 'https://account.srgssr.ch'
|
||||
_ID_TOKEN = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
|
||||
query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
|
||||
settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
|
||||
code_verifier = uuid.uuid4().hex + uuid.uuid4().hex + uuid.uuid4().hex
|
||||
code_challenge = base64.urlsafe_b64encode(
|
||||
hashlib.sha256(code_verifier.encode()).digest()).decode().rstrip('=')
|
||||
|
||||
csrf_token = settings['csrf']
|
||||
query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
|
||||
request_id = parse_qs(self._request_webpage(
|
||||
f'{self._LOGIN_BASE}/authz-srv/authz', None, 'Requesting session ID', query={
|
||||
'client_id': self._CLIENT_ID,
|
||||
'redirect_uri': 'https://www.playsuisse.ch/auth',
|
||||
'scope': 'email profile openid offline_access',
|
||||
'response_type': 'code',
|
||||
'code_challenge': code_challenge,
|
||||
'code_challenge_method': 'S256',
|
||||
'view_type': 'login',
|
||||
}).url)['requestId'][0]
|
||||
|
||||
status = traverse_obj(self._download_json(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
|
||||
query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
|
||||
'request_type': 'RESPONSE',
|
||||
'signInName': username,
|
||||
'password': password,
|
||||
}), expected_status=400), ('status', {int_or_none}))
|
||||
if status == 400:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
try:
|
||||
exchange_id = self._download_json(
|
||||
f'{self._LOGIN_BASE}/verification-srv/v2/authenticate/initiate/password', None,
|
||||
'Submitting username', headers={'content-type': 'application/json'}, data=json.dumps({
|
||||
'usage_type': 'INITIAL_AUTHENTICATION',
|
||||
'request_id': request_id,
|
||||
'medium_id': 'PASSWORD',
|
||||
'type': 'password',
|
||||
'identifier': username,
|
||||
}).encode())['data']['exchange_id']['exchange_id']
|
||||
except ExtractorError:
|
||||
raise ExtractorError('Invalid username', expected=True)
|
||||
|
||||
urlh = self._request_webpage(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
|
||||
None, 'Downloading ID token', query={
|
||||
'rememberMe': 'false',
|
||||
'csrf_token': csrf_token,
|
||||
**query,
|
||||
'diags': '',
|
||||
})
|
||||
try:
|
||||
login_data = self._download_json(
|
||||
f'{self._LOGIN_BASE}/verification-srv/v2/authenticate/authenticate/password', None,
|
||||
'Submitting password', headers={'content-type': 'application/json'}, data=json.dumps({
|
||||
'requestId': request_id,
|
||||
'exchange_id': exchange_id,
|
||||
'type': 'password',
|
||||
'password': password,
|
||||
}).encode())['data']
|
||||
except ExtractorError:
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
|
||||
authorization_code = parse_qs(self._request_webpage(
|
||||
f'{self._LOGIN_BASE}/login-srv/verification/login', None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'requestId': request_id,
|
||||
'exchange_id': login_data['exchange_id']['exchange_id'],
|
||||
'verificationType': 'password',
|
||||
'sub': login_data['sub'],
|
||||
'status_id': login_data['status_id'],
|
||||
'rememberMe': True,
|
||||
'lat': '',
|
||||
'lon': '',
|
||||
})).url)['code'][0]
|
||||
|
||||
self._ID_TOKEN = self._download_json(
|
||||
f'{self._LOGIN_BASE}/proxy/token', None, 'Downloading token', data=b'', query={
|
||||
'client_id': self._CLIENT_ID,
|
||||
'redirect_uri': 'https://www.playsuisse.ch/auth',
|
||||
'code': authorization_code,
|
||||
'code_verifier': code_verifier,
|
||||
'grant_type': 'authorization_code',
|
||||
})['id_token']
|
||||
|
||||
self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
|
||||
if not self._ID_TOKEN:
|
||||
raise ExtractorError('Login failed')
|
||||
|
||||
|
||||
130
yt_dlp/extractor/plvideo.py
Normal file
130
yt_dlp/extractor/plvideo.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PlVideoIE(InfoExtractor):
|
||||
IE_DESC = 'Платформа'
|
||||
_VALID_URL = r'https?://(?:www\.)?plvideo\.ru/(?:watch\?(?:[^#]+&)?v=|shorts/)(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://plvideo.ru/watch?v=Y5JzUzkcQTMK',
|
||||
'md5': 'fe8e18aca892b3b31f3bf492169f8a26',
|
||||
'info_dict': {
|
||||
'id': 'Y5JzUzkcQTMK',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-images/v/cover/37/dd/37dd00a4c96c77436ab737e85947abd7/original663a4a3bb713e5.33151959.jpg',
|
||||
'title': 'Presidente de Cuba llega a Moscú en una visita de trabajo',
|
||||
'channel': 'RT en Español',
|
||||
'channel_id': 'ZH4EKqunVDvo',
|
||||
'media_type': 'video',
|
||||
'comment_count': int,
|
||||
'tags': ['rusia', 'cuba', 'russia', 'miguel díaz-canel'],
|
||||
'description': 'md5:a1a395d900d77a86542a91ee0826c115',
|
||||
'release_timestamp': 1715096124,
|
||||
'channel_is_verified': True,
|
||||
'like_count': int,
|
||||
'timestamp': 1715095911,
|
||||
'duration': 44320,
|
||||
'view_count': int,
|
||||
'dislike_count': int,
|
||||
'upload_date': '20240507',
|
||||
'modified_date': '20240701',
|
||||
'channel_follower_count': int,
|
||||
'modified_timestamp': 1719824073,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://plvideo.ru/shorts/S3Uo9c-VLwFX',
|
||||
'md5': '7d8fa2279406c69d2fd2a6fc548a9805',
|
||||
'info_dict': {
|
||||
'id': 'S3Uo9c-VLwFX',
|
||||
'ext': 'mp4',
|
||||
'channel': 'Romaatom',
|
||||
'tags': 'count:22',
|
||||
'dislike_count': int,
|
||||
'upload_date': '20241130',
|
||||
'description': 'md5:452e6de219bf2f32bb95806c51c3b364',
|
||||
'duration': 58433,
|
||||
'modified_date': '20241130',
|
||||
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-11-cover/S3Uo9c-VLwFX/f9318999-a941-482b-b700-2102a7049366.jpg',
|
||||
'media_type': 'shorts',
|
||||
'like_count': int,
|
||||
'modified_timestamp': 1732961458,
|
||||
'channel_is_verified': True,
|
||||
'channel_id': 'erJyyTIbmUd1',
|
||||
'timestamp': 1732961355,
|
||||
'comment_count': int,
|
||||
'title': 'Белоусов отменил приказы о кадровом резерве на гражданской службе',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'release_timestamp': 1732961458,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
f'https://api.g1.plvideo.ru/v1/videos/{video_id}?Aud=18', video_id)
|
||||
|
||||
is_live = False
|
||||
formats = []
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
for quality, data in traverse_obj(video_data, ('item', 'profiles', {dict.items}, lambda _, v: url_or_none(v[1]['hls']))):
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
**traverse_obj(data, {
|
||||
'url': 'hls',
|
||||
'fps': ('fps', {float_or_none}),
|
||||
'aspect_ratio': ('aspectRatio', {float_or_none}),
|
||||
}),
|
||||
**parse_resolution(quality),
|
||||
})
|
||||
if livestream_url := traverse_obj(video_data, ('item', 'livestream', 'url', {url_or_none})):
|
||||
is_live = True
|
||||
formats.extend(self._extract_m3u8_formats(livestream_url, video_id, 'mp4', live=True))
|
||||
for lang, url in traverse_obj(video_data, ('item', 'subtitles', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
if lang.endswith('-auto'):
|
||||
automatic_captions.setdefault(lang[:-5], []).append({
|
||||
'url': url,
|
||||
})
|
||||
else:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
'is_live': is_live,
|
||||
**traverse_obj(video_data, ('item', {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('cover', 'paths', 'original', 'src', {url_or_none}),
|
||||
'duration': ('uploadFile', 'videoDuration', {int_or_none}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'channel_follower_count': ('channel', 'stats', 'subscribers', {int_or_none}),
|
||||
'channel_is_verified': ('channel', 'verified', {bool}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'release_timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'view_count': ('stats', 'viewTotalCount', {int_or_none}),
|
||||
'like_count': ('stats', 'likeCount', {int_or_none}),
|
||||
'dislike_count': ('stats', 'dislikeCount', {int_or_none}),
|
||||
'comment_count': ('stats', 'commentCount', {int_or_none}),
|
||||
'media_type': ('type', {str}),
|
||||
})),
|
||||
}
|
||||
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
truncate_string,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
@@ -26,6 +27,7 @@ class RedditIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'display_id': '6rrwyj',
|
||||
'title': 'That small heart attack.',
|
||||
'alt_title': 'That small heart attack.',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:4',
|
||||
'timestamp': 1501941939,
|
||||
@@ -49,7 +51,8 @@ class RedditIE(InfoExtractor):
|
||||
'id': 'gyh95hiqc0b11',
|
||||
'ext': 'mp4',
|
||||
'display_id': '90bu6w',
|
||||
'title': 'Heat index was 110 degrees so we offered him a cold drink. He went for a full body soak instead',
|
||||
'title': 'Heat index was 110 degrees so we offered him a cold drink. He went fo...',
|
||||
'alt_title': 'Heat index was 110 degrees so we offered him a cold drink. He went for a full body soak instead',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:7',
|
||||
'timestamp': 1532051078,
|
||||
@@ -69,7 +72,8 @@ class RedditIE(InfoExtractor):
|
||||
'id': 'zasobba6wp071',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'nip71r',
|
||||
'title': 'I plan to make more stickers and prints! Check them out on my Etsy! Or get them through my Patreon. Links below.',
|
||||
'title': 'I plan to make more stickers and prints! Check them out on my Etsy! O...',
|
||||
'alt_title': 'I plan to make more stickers and prints! Check them out on my Etsy! Or get them through my Patreon. Links below.',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:5',
|
||||
'timestamp': 1621709093,
|
||||
@@ -91,7 +95,17 @@ class RedditIE(InfoExtractor):
|
||||
'playlist_count': 2,
|
||||
'info_dict': {
|
||||
'id': 'wzqkxp',
|
||||
'title': 'md5:72d3d19402aa11eff5bd32fc96369b37',
|
||||
'title': '[Finale] Kamen Rider Revice Episode 50 "Family to the End, Until the ...',
|
||||
'alt_title': '[Finale] Kamen Rider Revice Episode 50 "Family to the End, Until the Day We Meet Again" Discussion',
|
||||
'description': 'md5:5b7deb328062b164b15704c5fd67c335',
|
||||
'uploader': 'TheTwelveYearOld',
|
||||
'channel_id': 'KamenRider',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'timestamp': 1661676059.0,
|
||||
'upload_date': '20220828',
|
||||
},
|
||||
}, {
|
||||
# crossposted reddit-hosted media
|
||||
@@ -102,6 +116,7 @@ class RedditIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'display_id': 'zjjw82',
|
||||
'title': 'Cringe',
|
||||
'alt_title': 'Cringe',
|
||||
'uploader': 'Otaku-senpai69420',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'upload_date': '20221212',
|
||||
@@ -122,6 +137,7 @@ class RedditIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'display_id': '124pp33',
|
||||
'title': 'Harmless prank of some old friends',
|
||||
'alt_title': 'Harmless prank of some old friends',
|
||||
'uploader': 'Dudezila',
|
||||
'channel_id': 'ContagiousLaughter',
|
||||
'duration': 17,
|
||||
@@ -142,6 +158,7 @@ class RedditIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'display_id': '12fujy3',
|
||||
'title': 'Based Hasan?',
|
||||
'alt_title': 'Based Hasan?',
|
||||
'uploader': 'KingNigelXLII',
|
||||
'channel_id': 'GenZedong',
|
||||
'duration': 16,
|
||||
@@ -161,6 +178,7 @@ class RedditIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'display_id': '1cl9h0u',
|
||||
'title': 'The insurance claim will be interesting',
|
||||
'alt_title': 'The insurance claim will be interesting',
|
||||
'uploader': 'darrenpauli',
|
||||
'channel_id': 'Unexpected',
|
||||
'duration': 53,
|
||||
@@ -183,6 +201,7 @@ class RedditIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'display_id': '1cxwzso',
|
||||
'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'',
|
||||
'alt_title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'',
|
||||
'uploader': 'Woodstovia',
|
||||
'channel_id': 'soccer',
|
||||
'duration': 30,
|
||||
@@ -198,6 +217,26 @@ class RedditIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
'writesubtitles': True,
|
||||
},
|
||||
}, {
|
||||
# "gated" subreddit post
|
||||
'url': 'https://old.reddit.com/r/ketamine/comments/degtjo/when_the_k_hits/',
|
||||
'info_dict': {
|
||||
'id': 'gqsbxts133r31',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'degtjo',
|
||||
'title': 'When the K hits',
|
||||
'alt_title': 'When the K hits',
|
||||
'uploader': '[deleted]',
|
||||
'channel_id': 'ketamine',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'duration': 34,
|
||||
'thumbnail': r're:https?://.+/.+\.(?:jpg|png)',
|
||||
'timestamp': 1570438713.0,
|
||||
'upload_date': '20191007',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
|
||||
'only_matching': True,
|
||||
@@ -245,6 +284,15 @@ class RedditIE(InfoExtractor):
|
||||
elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
|
||||
raise ExtractorError('Unable to login, no cookie was returned')
|
||||
|
||||
def _real_initialize(self):
|
||||
# Set cookie to opt-in to age-restricted subreddits
|
||||
self._set_cookie('reddit.com', 'over18', '1')
|
||||
# Set cookie to opt-in to "gated" subreddits
|
||||
options = traverse_obj(self._get_cookies('https://www.reddit.com/'), (
|
||||
'_options', 'value', {urllib.parse.unquote}, {json.loads}, {dict})) or {}
|
||||
options['pref_gated_sr_optin'] = True
|
||||
self._set_cookie('reddit.com', '_options', urllib.parse.quote(json.dumps(options)))
|
||||
|
||||
def _get_subtitles(self, video_id):
|
||||
# Fallback if there were no subtitles provided by DASH or HLS manifests
|
||||
caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt'
|
||||
@@ -276,14 +324,6 @@ class RedditIE(InfoExtractor):
|
||||
data = data[0]['data']['children'][0]['data']
|
||||
video_url = data['url']
|
||||
|
||||
over_18 = data.get('over_18')
|
||||
if over_18 is True:
|
||||
age_limit = 18
|
||||
elif over_18 is False:
|
||||
age_limit = 0
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
thumbnails = []
|
||||
|
||||
def add_thumbnail(src):
|
||||
@@ -309,15 +349,19 @@ class RedditIE(InfoExtractor):
|
||||
add_thumbnail(resolution)
|
||||
|
||||
info = {
|
||||
'title': data.get('title'),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': float_or_none(data.get('created_utc')),
|
||||
'uploader': data.get('author'),
|
||||
'channel_id': data.get('subreddit'),
|
||||
'like_count': int_or_none(data.get('ups')),
|
||||
'dislike_count': int_or_none(data.get('downs')),
|
||||
'comment_count': int_or_none(data.get('num_comments')),
|
||||
'age_limit': age_limit,
|
||||
'age_limit': {True: 18, False: 0}.get(data.get('over_18')),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {truncate_string(left=72)}),
|
||||
'alt_title': ('title', {str}),
|
||||
'description': ('selftext', {str}, filter),
|
||||
'timestamp': ('created_utc', {float_or_none}),
|
||||
'uploader': ('author', {str}),
|
||||
'channel_id': ('subreddit', {str}),
|
||||
'like_count': ('ups', {int_or_none}),
|
||||
'dislike_count': ('downs', {int_or_none}),
|
||||
'comment_count': ('num_comments', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
parsed_url = urllib.parse.urlparse(video_url)
|
||||
@@ -343,7 +387,7 @@ class RedditIE(InfoExtractor):
|
||||
**info,
|
||||
})
|
||||
if entries:
|
||||
return self.playlist_result(entries, video_id, info.get('title'))
|
||||
return self.playlist_result(entries, video_id, **info)
|
||||
raise ExtractorError('No media found', expected=True)
|
||||
|
||||
# Check if media is hosted on reddit:
|
||||
|
||||
@@ -114,7 +114,7 @@ class RedGifsBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class RedGifsIE(RedGifsBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/(?:watch|ifr)/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
|
||||
'info_dict': {
|
||||
@@ -147,6 +147,22 @@ class RedGifsIE(RedGifsBaseInfoExtractor):
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.redgifs.com/ifr/squeakyhelplesswisent',
|
||||
'info_dict': {
|
||||
'id': 'squeakyhelplesswisent',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hotwife Legs Thick',
|
||||
'timestamp': 1636287915,
|
||||
'upload_date': '20211107',
|
||||
'uploader': 'ignored52',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -3,12 +3,20 @@ import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from .common import InfoExtractor, Request
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RTPIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:[^/#?]+/)?p(?P<program_id>\d+)/(?P<id>e\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
||||
'md5': 'e736ce0c665e459ddb818546220b4ef8',
|
||||
@@ -16,99 +24,173 @@ class RTPIE(InfoExtractor):
|
||||
'id': 'e174042',
|
||||
'ext': 'mp3',
|
||||
'title': 'Paixões Cruzadas',
|
||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||
'description': 'md5:af979e58ba0ab73f78435fc943fdb070',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'series': 'Paixões Cruzadas',
|
||||
'duration': 2950.0,
|
||||
'modified_timestamp': 1553693464,
|
||||
'modified_date': '20190327',
|
||||
'timestamp': 1417219200,
|
||||
'upload_date': '20141129',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril',
|
||||
'md5': '9a81ed53f2b2197cfa7ed455b12f8ade',
|
||||
'md5': '5b4859940e3adef61247a77dfb76046a',
|
||||
'info_dict': {
|
||||
'id': 'e757904',
|
||||
'ext': 'mp4',
|
||||
'title': '25 Curiosidades, 25 de Abril',
|
||||
'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr',
|
||||
'title': 'Estudar ou não estudar',
|
||||
'description': 'md5:3bfd7eb8bebfd5711a08df69c9c14c35',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1711958401,
|
||||
'duration': 146.0,
|
||||
'upload_date': '20240401',
|
||||
'modified_timestamp': 1712242991,
|
||||
'series': '25 Curiosidades, 25 de Abril',
|
||||
'episode_number': 2,
|
||||
'episode': 'Estudar ou não estudar',
|
||||
'modified_date': '20240404',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon',
|
||||
'only_matching': True,
|
||||
# Episode not accessible through API
|
||||
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/e500050/portugues-1-ano',
|
||||
'md5': '57660c0b46db9f22118c52cbd65975e4',
|
||||
'info_dict': {
|
||||
'id': 'e500050',
|
||||
'ext': 'mp4',
|
||||
'title': 'Português - 1.º ano',
|
||||
'duration': 1669.0,
|
||||
'description': 'md5:be68925c81269f8c6886589f25fe83ea',
|
||||
'upload_date': '20201020',
|
||||
'timestamp': 1603180799,
|
||||
'thumbnail': 'https://cdn-images.rtp.pt/EPG/imagens/39482_59449_64850.png?v=3&w=860',
|
||||
},
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'rtpplay/2.0.66 (pt.rtp.rtpplay; build:2066; iOS 15.8.3) Alamofire/5.9.1'
|
||||
_AUTH_TOKEN = None
|
||||
|
||||
def _fetch_auth_token(self):
|
||||
if self._AUTH_TOKEN:
|
||||
return self._AUTH_TOKEN
|
||||
self._AUTH_TOKEN = traverse_obj(self._download_json(Request(
|
||||
'https://rtpplayapi.rtp.pt/play/api/2/token-manager',
|
||||
headers={
|
||||
'Accept': '*/*',
|
||||
'rtp-play-auth': 'RTPPLAY_MOBILE_IOS',
|
||||
'rtp-play-auth-hash': 'fac9c328b2f27e26e03d7f8942d66c05b3e59371e16c2a079f5c83cc801bd3ee',
|
||||
'rtp-play-auth-timestamp': '2145973229682',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}, extensions={'keep_header_casing': True}), None,
|
||||
note='Fetching guest auth token', errnote='Could not fetch guest auth token',
|
||||
fatal=False), ('token', 'token', {str}))
|
||||
return self._AUTH_TOKEN
|
||||
|
||||
@staticmethod
|
||||
def _cleanup_media_url(url):
|
||||
if urllib.parse.urlparse(url).netloc == 'streaming-ondemand.rtp.pt':
|
||||
return None
|
||||
return url.replace('/drm-fps/', '/hls/').replace('/drm-dash/', '/dash/')
|
||||
|
||||
def _extract_formats(self, media_urls, episode_id):
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for media_url in set(traverse_obj(media_urls, (..., {url_or_none}, {self._cleanup_media_url}))):
|
||||
ext = determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, episode_id, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
media_url, episode_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _extract_from_api(self, program_id, episode_id):
|
||||
auth_token = self._fetch_auth_token()
|
||||
if not auth_token:
|
||||
return
|
||||
episode_data = traverse_obj(self._download_json(
|
||||
f'https://www.rtp.pt/play/api/1/get-episode/{program_id}/{episode_id[1:]}', episode_id,
|
||||
query={'include_assets': 'true', 'include_webparams': 'true'},
|
||||
headers={
|
||||
'Accept': '*/*',
|
||||
'Authorization': f'Bearer {auth_token}',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}, fatal=False), 'result', {dict})
|
||||
if not episode_data:
|
||||
return
|
||||
asset_urls = traverse_obj(episode_data, ('assets', 0, 'asset_url', {dict}))
|
||||
media_urls = traverse_obj(asset_urls, (
|
||||
((('hls', 'dash'), 'stream_url'), ('multibitrate', ('url_hls', 'url_dash'))),))
|
||||
formats, subtitles = self._extract_formats(media_urls, episode_id)
|
||||
|
||||
for sub_data in traverse_obj(asset_urls, ('subtitles', 'vtt_list', lambda _, v: url_or_none(v['file']))):
|
||||
subtitles.setdefault(sub_data.get('code') or 'pt', []).append({
|
||||
'url': sub_data['file'],
|
||||
'name': sub_data.get('language'),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': traverse_obj(episode_data, ('assets', 0, 'asset_thumbnail', {url_or_none})),
|
||||
**traverse_obj(episode_data, ('episode', {
|
||||
'title': (('episode_title', 'program_title'), {str}, filter, any),
|
||||
'alt_title': ('episode_subtitle', {str}, filter),
|
||||
'description': (('episode_description', 'episode_summary'), {str}, filter, any),
|
||||
'timestamp': ('episode_air_date', {parse_iso8601(delimiter=' ')}),
|
||||
'modified_timestamp': ('episode_lastchanged', {parse_iso8601(delimiter=' ')}),
|
||||
'duration': ('episode_duration_complete', {parse_duration}),
|
||||
'episode': ('episode_title', {str}, filter),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season': ('program_season', {str}, filter),
|
||||
'series': ('program_title', {str}, filter),
|
||||
})),
|
||||
}
|
||||
|
||||
_RX_OBFUSCATION = re.compile(r'''(?xs)
|
||||
atob\s*\(\s*decodeURIComponent\s*\(\s*
|
||||
(\[[0-9A-Za-z%,'"]*\])
|
||||
\s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\)
|
||||
''')
|
||||
|
||||
def __unobfuscate(self, data, *, video_id):
|
||||
if data.startswith('{'):
|
||||
data = self._RX_OBFUSCATION.sub(
|
||||
lambda m: json.dumps(
|
||||
base64.b64decode(urllib.parse.unquote(
|
||||
''.join(self._parse_json(m.group(1), video_id)),
|
||||
)).decode('iso-8859-1')),
|
||||
data)
|
||||
return js_to_json(data)
|
||||
def __unobfuscate(self, data):
|
||||
return self._RX_OBFUSCATION.sub(
|
||||
lambda m: json.dumps(
|
||||
base64.b64decode(urllib.parse.unquote(
|
||||
''.join(json.loads(m.group(1))),
|
||||
)).decode('iso-8859-1')),
|
||||
data)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_meta(
|
||||
'twitter:title', webpage, display_name='title', fatal=True)
|
||||
|
||||
f, config = self._search_regex(
|
||||
r'''(?sx)
|
||||
(?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)?
|
||||
var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
|
||||
''', webpage,
|
||||
'player config', group=('f', 'config'))
|
||||
|
||||
config = self._parse_json(
|
||||
config, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
f = config['file'] if not f else self._parse_json(
|
||||
f, video_id,
|
||||
lambda data: self.__unobfuscate(data, video_id=video_id))
|
||||
def _extract_from_html(self, url, episode_id):
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
|
||||
formats = []
|
||||
if isinstance(f, dict):
|
||||
f_hls = f.get('hls')
|
||||
if f_hls is not None:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'))
|
||||
|
||||
f_dash = f.get('dash')
|
||||
if f_dash is not None:
|
||||
formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash'))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': 'f',
|
||||
'url': f,
|
||||
'vcodec': 'none' if config.get('mediaType') == 'audio' else None,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
|
||||
vtt = config.get('vtt')
|
||||
if vtt is not None:
|
||||
for lcode, lname, url in vtt:
|
||||
subtitles.setdefault(lcode, []).append({
|
||||
'name': lname,
|
||||
'url': url,
|
||||
})
|
||||
media_urls = traverse_obj(re.findall(r'(?:var\s+f\s*=|RTPPlayer\({[^}]+file:)\s*({[^}]+}|"[^"]+")', webpage), (
|
||||
-1, (({self.__unobfuscate}, {js_to_json}, {json.loads}, {dict.values}, ...), {json.loads})))
|
||||
formats, subtitles = self._extract_formats(media_urls, episode_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'id': episode_id,
|
||||
'formats': formats,
|
||||
'description': self._html_search_meta(['description', 'twitter:description'], webpage),
|
||||
'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
|
||||
'subtitles': subtitles,
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, default=None),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None),
|
||||
**self._search_json_ld(webpage, episode_id, default={}),
|
||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_id, episode_id = self._match_valid_url(url).group('program_id', 'id')
|
||||
return self._extract_from_api(program_id, episode_id) or self._extract_from_html(url, episode_id)
|
||||
|
||||
@@ -176,6 +176,8 @@ class RTVSLOShowIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '173250997',
|
||||
'title': 'Ekipa Bled',
|
||||
'description': 'md5:c88471e27a1268c448747a5325319ab7',
|
||||
'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/173250997/logo_wide1.jpg',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}]
|
||||
@@ -187,4 +189,7 @@ class RTVSLOShowIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
|
||||
playlist_id, self._html_extract_title(webpage),
|
||||
getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE)
|
||||
getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE,
|
||||
description=self._og_search_description(webpage),
|
||||
thumbnail=self._og_search_thumbnail(webpage),
|
||||
)
|
||||
|
||||
@@ -4,43 +4,12 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_qs,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
make_archive_id,
|
||||
remove_end,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
_COMMITTEES = {
|
||||
'ag': ('76440', 'http://ag-f.akamaihd.net'),
|
||||
'aging': ('76442', 'http://aging-f.akamaihd.net'),
|
||||
'approps': ('76441', 'http://approps-f.akamaihd.net'),
|
||||
'arch': ('', 'http://ussenate-f.akamaihd.net'),
|
||||
'armed': ('76445', 'http://armed-f.akamaihd.net'),
|
||||
'banking': ('76446', 'http://banking-f.akamaihd.net'),
|
||||
'budget': ('76447', 'http://budget-f.akamaihd.net'),
|
||||
'cecc': ('76486', 'http://srs-f.akamaihd.net'),
|
||||
'commerce': ('80177', 'http://commerce1-f.akamaihd.net'),
|
||||
'csce': ('75229', 'http://srs-f.akamaihd.net'),
|
||||
'dpc': ('76590', 'http://dpc-f.akamaihd.net'),
|
||||
'energy': ('76448', 'http://energy-f.akamaihd.net'),
|
||||
'epw': ('76478', 'http://epw-f.akamaihd.net'),
|
||||
'ethics': ('76449', 'http://ethics-f.akamaihd.net'),
|
||||
'finance': ('76450', 'http://finance-f.akamaihd.net'),
|
||||
'foreign': ('76451', 'http://foreign-f.akamaihd.net'),
|
||||
'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'),
|
||||
'help': ('76452', 'http://help-f.akamaihd.net'),
|
||||
'indian': ('76455', 'http://indian-f.akamaihd.net'),
|
||||
'intel': ('76456', 'http://intel-f.akamaihd.net'),
|
||||
'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'),
|
||||
'jccic': ('85180', 'http://jccic-f.akamaihd.net'),
|
||||
'jec': ('76458', 'http://jec-f.akamaihd.net'),
|
||||
'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'),
|
||||
'rpc': ('76591', 'http://rpc-f.akamaihd.net'),
|
||||
'rules': ('76460', 'http://rules-f.akamaihd.net'),
|
||||
'saa': ('76489', 'http://srs-f.akamaihd.net'),
|
||||
'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'),
|
||||
'srs': ('75229', 'http://srs-f.akamaihd.net'),
|
||||
'uscc': ('76487', 'http://srs-f.akamaihd.net'),
|
||||
'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'),
|
||||
}
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SenateISVPIE(InfoExtractor):
|
||||
@@ -53,31 +22,46 @@ class SenateISVPIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'title': 'ISVP',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'_old_archive_ids': ['senategov judiciary031715'],
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
||||
'info_dict': {
|
||||
'id': 'commerce011514',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'_old_archive_ids': ['senategov commerce011514'],
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is not available.',
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
||||
# checksum differs each time
|
||||
'info_dict': {
|
||||
'id': 'intel090613',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'title': 'ISVP',
|
||||
'_old_archive_ids': ['senategov intel090613'],
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'https://www.senate.gov/isvp/?auto_play=false&comm=help&filename=help090920&poster=https://www.help.senate.gov/assets/images/video-poster.png&stt=950',
|
||||
'info_dict': {
|
||||
'id': 'help090920',
|
||||
'ext': 'mp4',
|
||||
'title': 'ISVP',
|
||||
'thumbnail': 'https://www.help.senate.gov/assets/images/video-poster.png',
|
||||
'_old_archive_ids': ['senategov help090920'],
|
||||
},
|
||||
}, {
|
||||
# From http://www.c-span.org/video/?96791-1
|
||||
@@ -85,60 +69,81 @@ class SenateISVPIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_COMMITTEES = {
|
||||
'ag': ('76440', 'https://ag-f.akamaihd.net', '2036803', 'agriculture'),
|
||||
'aging': ('76442', 'https://aging-f.akamaihd.net', '2036801', 'aging'),
|
||||
'approps': ('76441', 'https://approps-f.akamaihd.net', '2036802', 'appropriations'),
|
||||
'arch': ('', 'https://ussenate-f.akamaihd.net', '', 'arch'),
|
||||
'armed': ('76445', 'https://armed-f.akamaihd.net', '2036800', 'armedservices'),
|
||||
'banking': ('76446', 'https://banking-f.akamaihd.net', '2036799', 'banking'),
|
||||
'budget': ('76447', 'https://budget-f.akamaihd.net', '2036798', 'budget'),
|
||||
'cecc': ('76486', 'https://srs-f.akamaihd.net', '2036782', 'srs_cecc'),
|
||||
'commerce': ('80177', 'https://commerce1-f.akamaihd.net', '2036779', 'commerce'),
|
||||
'csce': ('75229', 'https://srs-f.akamaihd.net', '2036777', 'srs_srs'),
|
||||
'dpc': ('76590', 'https://dpc-f.akamaihd.net', '', 'dpc'),
|
||||
'energy': ('76448', 'https://energy-f.akamaihd.net', '2036797', 'energy'),
|
||||
'epw': ('76478', 'https://epw-f.akamaihd.net', '2036783', 'environment'),
|
||||
'ethics': ('76449', 'https://ethics-f.akamaihd.net', '2036796', 'ethics'),
|
||||
'finance': ('76450', 'https://finance-f.akamaihd.net', '2036795', 'finance_finance'),
|
||||
'foreign': ('76451', 'https://foreign-f.akamaihd.net', '2036794', 'foreignrelations'),
|
||||
'govtaff': ('76453', 'https://govtaff-f.akamaihd.net', '2036792', 'hsgac'),
|
||||
'help': ('76452', 'https://help-f.akamaihd.net', '2036793', 'help'),
|
||||
'indian': ('76455', 'https://indian-f.akamaihd.net', '2036791', 'indianaffairs'),
|
||||
'intel': ('76456', 'https://intel-f.akamaihd.net', '2036790', 'intelligence'),
|
||||
'intlnarc': ('76457', 'https://intlnarc-f.akamaihd.net', '', 'internationalnarcoticscaucus'),
|
||||
'jccic': ('85180', 'https://jccic-f.akamaihd.net', '2036778', 'jccic'),
|
||||
'jec': ('76458', 'https://jec-f.akamaihd.net', '2036789', 'jointeconomic'),
|
||||
'judiciary': ('76459', 'https://judiciary-f.akamaihd.net', '2036788', 'judiciary'),
|
||||
'rpc': ('76591', 'https://rpc-f.akamaihd.net', '', 'rpc'),
|
||||
'rules': ('76460', 'https://rules-f.akamaihd.net', '2036787', 'rules'),
|
||||
'saa': ('76489', 'https://srs-f.akamaihd.net', '2036780', 'srs_saa'),
|
||||
'smbiz': ('76461', 'https://smbiz-f.akamaihd.net', '2036786', 'smallbusiness'),
|
||||
'srs': ('75229', 'https://srs-f.akamaihd.net', '2031966', 'srs_srs'),
|
||||
'uscc': ('76487', 'https://srs-f.akamaihd.net', '2036781', 'srs_uscc'),
|
||||
'vetaff': ('76462', 'https://vetaff-f.akamaihd.net', '2036785', 'veteransaffairs'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
qs = urllib.parse.parse_qs(self._match_valid_url(url).group('qs'))
|
||||
if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
|
||||
if not qs.get('filename') or not qs.get('comm'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = re.sub(r'.mp4$', '', qs['filename'][0])
|
||||
filename = qs['filename'][0]
|
||||
video_id = remove_end(filename, '.mp4')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
committee = qs['comm'][0]
|
||||
|
||||
if smuggled_data.get('force_title'):
|
||||
title = smuggled_data['force_title']
|
||||
else:
|
||||
title = self._html_extract_title(webpage)
|
||||
poster = qs.get('poster')
|
||||
thumbnail = poster[0] if poster else None
|
||||
|
||||
video_type = qs['type'][0]
|
||||
committee = video_type if video_type == 'arch' else qs['comm'][0]
|
||||
|
||||
stream_num, domain = _COMMITTEES[committee]
|
||||
stream_num, stream_domain, stream_id, msl3 = self._COMMITTEES[committee]
|
||||
|
||||
urls_alternatives = [f'https://www-senate-gov-media-srs.akamaized.net/hls/live/{stream_id}/{committee}/{filename}/master.m3u8',
|
||||
f'https://www-senate-gov-msl3archive.akamaized.net/{msl3}/{filename}_1/master.m3u8',
|
||||
f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
|
||||
f'{stream_domain}/i/{filename}.mp4/master.m3u8']
|
||||
formats = []
|
||||
if video_type == 'arch':
|
||||
filename = video_id if '.' in video_id else video_id + '.mp4'
|
||||
m3u8_url = urllib.parse.urljoin(domain, 'i/' + filename + '/master.m3u8')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8')
|
||||
else:
|
||||
hdcore_sign = 'hdcore=3.1.0'
|
||||
url_params = (domain, video_id, stream_num)
|
||||
f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params
|
||||
m3u8_url = '{}/i/{}_1@{}/master.m3u8'.format(*url_params)
|
||||
for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
formats.append(entry)
|
||||
for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
|
||||
mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
|
||||
if mobj:
|
||||
entry['format_id'] += mobj.group('tag')
|
||||
formats.append(entry)
|
||||
subtitles = {}
|
||||
for video_url in urls_alternatives:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', fatal=False)
|
||||
if formats:
|
||||
break
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._html_extract_title(webpage),
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': traverse_obj(qs, ('poster', 0, {url_or_none})),
|
||||
'_old_archive_ids': [make_archive_id(SenateGovIE, video_id)],
|
||||
}
|
||||
|
||||
|
||||
class SenateGovIE(InfoExtractor):
|
||||
_IE_NAME = 'senate.gov'
|
||||
_VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov'
|
||||
_SUBDOMAIN_RE = '|'.join(map(re.escape, (
|
||||
'agriculture', 'aging', 'appropriations', 'armed-services', 'banking',
|
||||
'budget', 'commerce', 'energy', 'epw', 'finance', 'foreign', 'help',
|
||||
'intelligence', 'inaugural', 'judiciary', 'rules', 'sbc', 'veterans',
|
||||
)))
|
||||
_VALID_URL = rf'https?://(?:www\.)?(?:{_SUBDOMAIN_RE})\.senate\.gov'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health',
|
||||
'info_dict': {
|
||||
@@ -147,6 +152,9 @@ class SenateGovIE(InfoExtractor):
|
||||
'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health',
|
||||
'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://www.help.senate.gov/assets/images/sharelogo.jpg',
|
||||
'_old_archive_ids': ['senategov help090920'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@@ -156,8 +164,12 @@ class SenateGovIE(InfoExtractor):
|
||||
'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD',
|
||||
'title': 'Review of the FY2019 Budget Request for the U.S. Army',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://www.appropriations.senate.gov/themes/appropriations/images/video-poster-flash-fit.png',
|
||||
'_old_archive_ids': ['senategov appropsA051518'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization',
|
||||
'info_dict': {
|
||||
@@ -166,32 +178,65 @@ class SenateGovIE(InfoExtractor):
|
||||
'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization',
|
||||
'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://www.banking.senate.gov/themes/banking/images/sharelogo.jpg',
|
||||
'age_limit': 0,
|
||||
'_old_archive_ids': ['senategov banking041521'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.agriculture.senate.gov/hearings/hemp-production-and-the-2018-farm-bill',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aging.senate.gov/hearings/the-older-americans-act-the-local-impact-of-the-law-and-the-upcoming-reauthorization',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.budget.senate.gov/hearings/improving-care-lowering-costs-achieving-health-care-efficiency',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.commerce.senate.gov/2024/12/communications-networks-safety-and-security',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.energy.senate.gov/hearings/2024/2/full-committee-hearing-to-examine',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.epw.senate.gov/public/index.cfm/hearings?ID=F63083EA-2C13-498C-B548-341BED68C209',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.foreign.senate.gov/hearings/american-diplomacy-and-global-leadership-review-of-the-fy25-state-department-budget-request',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.intelligence.senate.gov/hearings/foreign-threats-elections-2024-%E2%80%93-roles-and-responsibilities-us-tech-providers',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.inaugural.senate.gov/52nd-inaugural-ceremonies/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rules.senate.gov/hearings/02/07/2023/business-meeting',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbc.senate.gov/public/index.cfm/hearings?ID=5B13AA6B-8279-45AF-B54B-94156DC7A2AB',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.veterans.senate.gov/2024/5/frontier-health-care-ensuring-veterans-access-no-matter-where-they-live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._generic_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
parse_info = parse_qs(self._search_regex(
|
||||
r'<iframe class="[^>"]*streaminghearing[^>"]*"\s[^>]*\bsrc="([^">]*)', webpage, 'hearing URL'))
|
||||
|
||||
stream_num, stream_domain = _COMMITTEES[parse_info['comm'][-1]]
|
||||
filename = parse_info['filename'][-1]
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
|
||||
display_id, ext='mp4')
|
||||
url_info = next(SenateISVPIE.extract_from_webpage(self._downloader, url, webpage), None)
|
||||
if not url_info:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title')
|
||||
(*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title', fatal=False)
|
||||
|
||||
return {
|
||||
'id': re.sub(r'.mp4$', '', filename),
|
||||
**url_info,
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -2,16 +2,18 @@ import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SkyItPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'player.sky.it'
|
||||
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
|
||||
class SkyItBaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
_DOMAIN = 'sky'
|
||||
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
|
||||
@@ -33,7 +35,6 @@ class SkyItPlayerIE(InfoExtractor):
|
||||
SkyItPlayerIE.ie_key(), video_id)
|
||||
|
||||
def _parse_video(self, video, video_id):
|
||||
title = video['title']
|
||||
is_live = video.get('type') == 'live'
|
||||
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
|
||||
if not hls_url and video.get('geoblock' if is_live else 'geob'):
|
||||
@@ -43,7 +44,7 @@ class SkyItPlayerIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': video.get('title'),
|
||||
'formats': formats,
|
||||
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
|
||||
'description': video.get('short_desc') or None,
|
||||
@@ -52,6 +53,11 @@ class SkyItPlayerIE(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class SkyItPlayerIE(SkyItBaseIE):
|
||||
IE_NAME = 'player.sky.it'
|
||||
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain = urllib.parse.parse_qs(urllib.parse.urlparse(
|
||||
@@ -67,7 +73,7 @@ class SkyItPlayerIE(InfoExtractor):
|
||||
return self._parse_video(video, video_id)
|
||||
|
||||
|
||||
class SkyItVideoIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
|
||||
class SkyItVideoIE(SkyItBaseIE):
|
||||
IE_NAME = 'video.sky.it'
|
||||
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
@@ -96,7 +102,7 @@ class SkyItVideoIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
|
||||
return self._player_url_result(video_id)
|
||||
|
||||
|
||||
class SkyItVideoLiveIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
|
||||
class SkyItVideoLiveIE(SkyItBaseIE):
|
||||
IE_NAME = 'video.sky.it:live'
|
||||
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
@@ -124,7 +130,7 @@ class SkyItVideoLiveIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
|
||||
return self._parse_video(livestream, asset_id)
|
||||
|
||||
|
||||
class SkyItIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
|
||||
class SkyItIE(SkyItBaseIE):
|
||||
IE_NAME = 'sky.it'
|
||||
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
@@ -223,3 +229,80 @@ class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
_DOMAIN = 'mtv8'
|
||||
|
||||
|
||||
class TV8ItLiveIE(SkyItBaseIE):
|
||||
IE_NAME = 'tv8.it:live'
|
||||
IE_DESC = 'TV8 Live'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv8\.it/streaming'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv8.it/streaming',
|
||||
'info_dict': {
|
||||
'id': 'tv8',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'tv8'
|
||||
livestream = self._download_json(
|
||||
'https://apid.sky.it/vdp/v1/getLivestream', video_id,
|
||||
'Downloading manifest JSON', query={'id': '7'})
|
||||
metadata = self._download_json('https://tv8.it/api/getStreaming', video_id, fatal=False)
|
||||
|
||||
return {
|
||||
**self._parse_video(livestream, video_id),
|
||||
**traverse_obj(metadata, ('info', {
|
||||
'title': ('title', 'text', {str}),
|
||||
'description': ('description', 'html', {clean_html}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class TV8ItPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'tv8.it:playlist'
|
||||
IE_DESC = 'TV8 Playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv8\.it/(?!video)[^/#?]+/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv8.it/intrattenimento/tv8-gialappas-night',
|
||||
'playlist_mincount': 32,
|
||||
'info_dict': {
|
||||
'id': 'tv8-gialappas-night',
|
||||
'title': 'Tv8 Gialappa\'s Night',
|
||||
'description': 'md5:c876039d487d9cf40229b768872718ed',
|
||||
'thumbnail': r're:https://static\.sky\.it/.+\.(png|jpe?g|webp)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv8.it/sport/uefa-europa-league',
|
||||
'playlist_mincount': 11,
|
||||
'info_dict': {
|
||||
'id': 'uefa-europa-league',
|
||||
'title': 'UEFA Europa League',
|
||||
'description': 'md5:9ab1832b7a8b1705b1f590e13a36bc6a',
|
||||
'thumbnail': r're:https://static\.sky\.it/.+\.(png|jpe?g|webp)',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
data = self._search_nextjs_data(webpage, playlist_id)['props']['pageProps']['data']
|
||||
entries = [self.url_result(
|
||||
urljoin('https://tv8.it', card['href']), ie=TV8ItIE,
|
||||
**traverse_obj(card, {
|
||||
'description': ('extraData', 'videoDesc', {str}),
|
||||
'id': ('extraData', 'asset_id', {str}),
|
||||
'thumbnail': ('image', 'src', {url_or_none}),
|
||||
'title': ('title', 'typography', 'text', {str}),
|
||||
}))
|
||||
for card in traverse_obj(data, ('lastContent', 'cards', lambda _, v: v['href']))]
|
||||
|
||||
return self.playlist_result(entries, playlist_id, **traverse_obj(data, ('card', 'desktop', {
|
||||
'description': ('description', 'html', {clean_html}),
|
||||
'thumbnail': ('image', 'src', {url_or_none}),
|
||||
'title': ('title', 'text', {str}),
|
||||
})))
|
||||
|
||||
87
yt_dlp/extractor/softwhiteunderbelly.py
Normal file
87
yt_dlp/extractor/softwhiteunderbelly.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VHXEmbedIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
update_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class SoftWhiteUnderbellyIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.softwhiteunderbelly.com/login'
|
||||
_NETRC_MACHINE = 'softwhiteunderbelly'
|
||||
_VALID_URL = r'https?://(?:www\.)?softwhiteunderbelly\.com/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.softwhiteunderbelly.com/videos/kenneth-final1',
|
||||
'note': 'A single Soft White Underbelly Episode',
|
||||
'md5': '8e79f29ec1f1bda6da2e0b998fcbebb8',
|
||||
'info_dict': {
|
||||
'id': '3201266',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'kenneth-final1',
|
||||
'title': 'Appalachian Man interview-Kenneth',
|
||||
'description': 'Soft White Underbelly interview and portrait of Kenneth, an Appalachian man in Clay County, Kentucky.',
|
||||
'thumbnail': 'https://vhx.imgix.net/softwhiteunderbelly/assets/249f6db0-2b39-49a4-979b-f8dad4681825.jpg',
|
||||
'uploader_url': 'https://vimeo.com/user80538407',
|
||||
'uploader': 'OTT Videos',
|
||||
'uploader_id': 'user80538407',
|
||||
'duration': 512,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
'url': 'https://www.softwhiteunderbelly.com/videos/tj-2-final-2160p',
|
||||
'note': 'A single Soft White Underbelly Episode',
|
||||
'md5': '286bd8851b4824c62afb369e6f307036',
|
||||
'info_dict': {
|
||||
'id': '3506029',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'tj-2-final-2160p',
|
||||
'title': 'Fentanyl Addict interview-TJ (follow up)',
|
||||
'description': 'Soft White Underbelly follow up interview and portrait of TJ, a fentanyl addict on Skid Row.',
|
||||
'thumbnail': 'https://vhx.imgix.net/softwhiteunderbelly/assets/c883d531-5da0-4faf-a2e2-8eba97e5adfc.jpg',
|
||||
'duration': 817,
|
||||
'uploader': 'OTT Videos',
|
||||
'uploader_url': 'https://vimeo.com/user80538407',
|
||||
'uploader_id': 'user80538407',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}]
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
signin_page = self._download_webpage(self._LOGIN_URL, None, 'Fetching authenticity token')
|
||||
self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'email': username,
|
||||
'password': password,
|
||||
'authenticity_token': self._html_search_regex(
|
||||
r'name=["\']authenticity_token["\']\s+value=["\']([^"\']+)', signin_page, 'authenticity_token'),
|
||||
'utf8': True,
|
||||
}),
|
||||
)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if '<div id="watch-unauthorized"' in webpage:
|
||||
if self._get_cookies('https://www.softwhiteunderbelly.com').get('_session'):
|
||||
raise ExtractorError('This account is not subscribed to this content', expected=True)
|
||||
self.raise_login_required()
|
||||
|
||||
embed_url, embed_id = self._html_search_regex(
|
||||
r'embed_url:\s*["\'](?P<url>https?://embed\.vhx\.tv/videos/(?P<id>\d+)[^"\']*)',
|
||||
webpage, 'embed url', group=('url', 'id'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': VHXEmbedIE.ie_key(),
|
||||
'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.softwhiteunderbelly.com'),
|
||||
'id': embed_id,
|
||||
'display_id': display_id,
|
||||
'title': traverse_obj(webpage, ({find_element(id='watch-info')}, {find_element(cls='video-title')}, {clean_html})),
|
||||
'description': self._html_search_meta('description', webpage, default=None),
|
||||
'thumbnail': update_url(self._og_search_thumbnail(webpage) or '', query=None) or None,
|
||||
}
|
||||
@@ -7,7 +7,6 @@ from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -53,7 +52,8 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
_API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
|
||||
_HEADERS = {}
|
||||
|
||||
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
|
||||
_IMAGE_REPL_RE = r'-[0-9a-z]+\.(?P<ext>jpg|png)'
|
||||
_TAGS_RE = re.compile(r'"([^"]+)"|([^ ]+)')
|
||||
|
||||
_ARTWORK_MAP = {
|
||||
'mini': 16,
|
||||
@@ -211,6 +211,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
has_drm = False
|
||||
query = {'client_id': self._CLIENT_ID}
|
||||
if secret_token:
|
||||
query['secret_token'] = secret_token
|
||||
@@ -246,55 +247,24 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
'url': format_url,
|
||||
'quality': 10,
|
||||
'format_note': 'Original',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
def invalid_url(url):
|
||||
return not url or url in format_urls
|
||||
|
||||
def add_format(f, protocol, is_preview=False):
|
||||
mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
|
||||
if mobj:
|
||||
for k, v in mobj.groupdict().items():
|
||||
if not f.get(k):
|
||||
f[k] = v
|
||||
format_id_list = []
|
||||
if protocol:
|
||||
format_id_list.append(protocol)
|
||||
ext = f.get('ext')
|
||||
if ext == 'aac':
|
||||
f.update({
|
||||
'abr': 256,
|
||||
'quality': 5,
|
||||
'format_note': 'Premium',
|
||||
})
|
||||
for k in ('ext', 'abr'):
|
||||
v = str_or_none(f.get(k))
|
||||
if v:
|
||||
format_id_list.append(v)
|
||||
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
|
||||
if preview:
|
||||
format_id_list.append('preview')
|
||||
abr = f.get('abr')
|
||||
if abr:
|
||||
f['abr'] = int(abr)
|
||||
if protocol in ('hls', 'hls-aes'):
|
||||
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
|
||||
else:
|
||||
protocol = 'http'
|
||||
f.update({
|
||||
'format_id': '_'.join(format_id_list),
|
||||
'protocol': protocol,
|
||||
'preference': -10 if preview else None,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
# New API
|
||||
for t in traverse_obj(info, ('media', 'transcodings', lambda _, v: url_or_none(v['url']))):
|
||||
for t in traverse_obj(info, ('media', 'transcodings', lambda _, v: url_or_none(v['url']) and v['preset'])):
|
||||
if extract_flat:
|
||||
break
|
||||
format_url = t['url']
|
||||
preset = t['preset']
|
||||
preset_base = preset.partition('_')[0]
|
||||
|
||||
protocol = traverse_obj(t, ('format', 'protocol', {str}))
|
||||
protocol = traverse_obj(t, ('format', 'protocol', {str})) or 'http'
|
||||
if protocol.startswith(('ctr-', 'cbc-')):
|
||||
has_drm = True
|
||||
continue
|
||||
if protocol == 'progressive':
|
||||
protocol = 'http'
|
||||
if protocol != 'hls' and '/hls' in format_url:
|
||||
@@ -302,47 +272,74 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url:
|
||||
protocol = 'hls-aes'
|
||||
|
||||
ext = None
|
||||
if preset := traverse_obj(t, ('preset', {str_or_none})):
|
||||
ext = preset.split('_')[0]
|
||||
if ext not in KNOWN_EXTENSIONS:
|
||||
ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str})))
|
||||
|
||||
identifier = join_nonempty(protocol, ext, delim='_')
|
||||
if not self._is_requested(identifier):
|
||||
self.write_debug(f'"{identifier}" is not a requested format, skipping')
|
||||
short_identifier = f'{protocol}_{preset_base}'
|
||||
if preset_base == 'abr':
|
||||
self.write_debug(f'Skipping broken "{short_identifier}" format')
|
||||
continue
|
||||
if not self._is_requested(short_identifier):
|
||||
self.write_debug(f'"{short_identifier}" is not a requested format, skipping')
|
||||
continue
|
||||
|
||||
# XXX: if not extract_flat, 429 error must be caught where _extract_info_dict is called
|
||||
stream_url = traverse_obj(self._call_api(
|
||||
format_url, track_id, f'Downloading {identifier} format info JSON',
|
||||
format_url, track_id, f'Downloading {short_identifier} format info JSON',
|
||||
query=query, headers=self._HEADERS), ('url', {url_or_none}))
|
||||
|
||||
if invalid_url(stream_url):
|
||||
continue
|
||||
format_urls.add(stream_url)
|
||||
add_format({
|
||||
|
||||
mime_type = traverse_obj(t, ('format', 'mime_type', {str}))
|
||||
codec = self._search_regex(r'codecs="([^"]+)"', mime_type, 'codec', default=None)
|
||||
ext = {
|
||||
'mp4a': 'm4a',
|
||||
'opus': 'opus',
|
||||
}.get(codec[:4] if codec else None) or mimetype2ext(mime_type, default=None)
|
||||
if not ext or ext == 'm3u8':
|
||||
ext = preset_base
|
||||
|
||||
is_premium = t.get('quality') == 'hq'
|
||||
abr = int_or_none(
|
||||
self._search_regex(r'(\d+)k$', preset, 'abr', default=None)
|
||||
or self._search_regex(r'\.(\d+)\.(?:opus|mp3)[/?]', stream_url, 'abr', default=None)
|
||||
or (256 if (is_premium and 'aac' in preset) else None))
|
||||
|
||||
is_preview = (t.get('snipped')
|
||||
or '/preview/' in format_url
|
||||
or re.search(r'/(?:preview|playlist)/0/30/', stream_url))
|
||||
|
||||
formats.append({
|
||||
'format_id': join_nonempty(protocol, preset, is_preview and 'preview', delim='_'),
|
||||
'url': stream_url,
|
||||
'ext': ext,
|
||||
}, protocol, t.get('snipped') or '/preview/' in format_url)
|
||||
'acodec': codec,
|
||||
'vcodec': 'none',
|
||||
'abr': abr,
|
||||
'protocol': 'm3u8_native' if protocol in ('hls', 'hls-aes') else 'http',
|
||||
'container': 'm4a_dash' if ext == 'm4a' else None,
|
||||
'quality': 5 if is_premium else 0 if (abr and abr >= 160) else -1,
|
||||
'format_note': 'Premium' if is_premium else None,
|
||||
'preference': -10 if is_preview else None,
|
||||
})
|
||||
|
||||
for f in formats:
|
||||
f['vcodec'] = 'none'
|
||||
|
||||
if not formats and info.get('policy') == 'BLOCK':
|
||||
self.raise_geo_restricted(metadata_available=True)
|
||||
if not formats:
|
||||
if has_drm:
|
||||
self.report_drm(track_id)
|
||||
if info.get('policy') == 'BLOCK':
|
||||
self.raise_geo_restricted(metadata_available=True)
|
||||
|
||||
user = info.get('user') or {}
|
||||
|
||||
thumbnails = []
|
||||
artwork_url = info.get('artwork_url')
|
||||
thumbnail = artwork_url or user.get('avatar_url')
|
||||
if isinstance(thumbnail, str):
|
||||
if re.search(self._IMAGE_REPL_RE, thumbnail):
|
||||
if url_or_none(thumbnail):
|
||||
if mobj := re.search(self._IMAGE_REPL_RE, thumbnail):
|
||||
for image_id, size in self._ARTWORK_MAP.items():
|
||||
# Soundcloud serves JPEG regardless of URL's ext *except* for "original" thumb
|
||||
ext = mobj.group('ext') if image_id == 'original' else 'jpg'
|
||||
i = {
|
||||
'id': image_id,
|
||||
'url': re.sub(self._IMAGE_REPL_RE, f'-{image_id}.jpg', thumbnail),
|
||||
'url': re.sub(self._IMAGE_REPL_RE, f'-{image_id}.{ext}', thumbnail),
|
||||
}
|
||||
if image_id == 'tiny' and not artwork_url:
|
||||
size = 18
|
||||
@@ -367,6 +364,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
'uploader_url': user.get('permalink_url'),
|
||||
'timestamp': unified_timestamp(info.get('created_at')),
|
||||
'title': info.get('title'),
|
||||
'track': info.get('title'),
|
||||
'description': info.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
@@ -377,6 +375,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
'comment_count': extract_count('comment'),
|
||||
'repost_count': extract_count('reposts'),
|
||||
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
|
||||
'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)),
|
||||
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
||||
'formats': formats if not extract_flat else None,
|
||||
}
|
||||
@@ -399,7 +398,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||
(?!stations/track)
|
||||
(?P<uploader>[\w\d-]+)/
|
||||
(?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||
(?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight|comments)/?(?:$|[?#]))
|
||||
(?P<title>[\w\d-]+)
|
||||
(?:/(?P<token>(?!(?:albums|sets|recommended))[^?]+?))?
|
||||
(?:[?].*)?$)
|
||||
@@ -416,6 +415,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'id': '62986583',
|
||||
'ext': 'opus',
|
||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||
'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
||||
'uploader': 'E.T. ExTerrestrial Music',
|
||||
'uploader_id': '1571244',
|
||||
@@ -429,6 +429,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'repost_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||
'tags': 'count:14',
|
||||
},
|
||||
},
|
||||
# geo-restricted
|
||||
@@ -438,12 +439,13 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'id': '47127627',
|
||||
'ext': 'opus',
|
||||
'title': 'Goldrushed',
|
||||
'track': 'Goldrushed',
|
||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||
'uploader': 'The Royal Concept',
|
||||
'uploader_id': '9615865',
|
||||
'timestamp': 1337635207,
|
||||
'upload_date': '20120521',
|
||||
'duration': 227.155,
|
||||
'duration': 227.103,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
@@ -453,6 +455,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
||||
'genres': ['Alternative'],
|
||||
'artists': ['The Royal Concept'],
|
||||
'tags': [],
|
||||
},
|
||||
},
|
||||
# private link
|
||||
@@ -463,6 +466,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'id': '123998367',
|
||||
'ext': 'mp3',
|
||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||
'track': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||
'description': 'test chars: "\'/\\ä↭',
|
||||
'uploader': 'jaimeMF',
|
||||
'uploader_id': '69767071',
|
||||
@@ -477,6 +481,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_url': 'https://soundcloud.com/jaimemf',
|
||||
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
|
||||
'genres': ['youtubedl'],
|
||||
'tags': [],
|
||||
},
|
||||
},
|
||||
# private link (alt format)
|
||||
@@ -487,6 +492,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'id': '123998367',
|
||||
'ext': 'mp3',
|
||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||
'track': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||
'description': 'test chars: "\'/\\ä↭',
|
||||
'uploader': 'jaimeMF',
|
||||
'uploader_id': '69767071',
|
||||
@@ -501,16 +507,18 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_url': 'https://soundcloud.com/jaimemf',
|
||||
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
|
||||
'genres': ['youtubedl'],
|
||||
'tags': [],
|
||||
},
|
||||
},
|
||||
# downloadable song
|
||||
{
|
||||
'url': 'https://soundcloud.com/the80m/the-following',
|
||||
'md5': '9ffcddb08c87d74fb5808a3c183a1d04',
|
||||
'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04'
|
||||
'info_dict': {
|
||||
'id': '343609555',
|
||||
'ext': 'wav',
|
||||
'ext': 'opus', # wav original available with auth
|
||||
'title': 'The Following',
|
||||
'track': 'The Following',
|
||||
'description': '',
|
||||
'uploader': '80M',
|
||||
'uploader_id': '312384765',
|
||||
@@ -526,16 +534,20 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'view_count': int,
|
||||
'genres': ['Dance & EDM'],
|
||||
'artists': ['80M'],
|
||||
'tags': ['80M', 'EDM', 'Dance', 'Music'],
|
||||
},
|
||||
'expected_warnings': ['Original download format is only available for registered users'],
|
||||
},
|
||||
# private link, downloadable format
|
||||
# tags with spaces (e.g. "Uplifting Trance", "Ori Uplift")
|
||||
{
|
||||
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
|
||||
'md5': '64a60b16e617d41d0bef032b7f55441e',
|
||||
'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e'
|
||||
'info_dict': {
|
||||
'id': '340344461',
|
||||
'ext': 'wav',
|
||||
'ext': 'opus', # wav original available with auth
|
||||
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||
'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||
'uploader': 'Ori Uplift Music',
|
||||
'uploader_id': '12563093',
|
||||
@@ -551,7 +563,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_url': 'https://soundcloud.com/oriuplift',
|
||||
'genres': ['Trance'],
|
||||
'artists': ['Ori Uplift'],
|
||||
'tags': ['Orchestral', 'Emotional', 'Uplifting Trance', 'Trance', 'Ori Uplift', 'UpOnly'],
|
||||
},
|
||||
'expected_warnings': ['Original download format is only available for registered users'],
|
||||
},
|
||||
# no album art, use avatar pic for thumbnail
|
||||
{
|
||||
@@ -561,6 +575,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'id': '309699954',
|
||||
'ext': 'mp3',
|
||||
'title': 'Sideways (Prod. Mad Real)',
|
||||
'track': 'Sideways (Prod. Mad Real)',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'uploader': 'garyvee',
|
||||
'uploader_id': '2366352',
|
||||
@@ -575,6 +590,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/garyvee',
|
||||
'artists': ['MadReal'],
|
||||
'tags': [],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -587,6 +603,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'id': '583011102',
|
||||
'ext': 'opus',
|
||||
'title': 'Mezzo Valzer',
|
||||
'track': 'Mezzo Valzer',
|
||||
'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
|
||||
'uploader': 'Giovanni Sarani',
|
||||
'uploader_id': '3352531',
|
||||
@@ -601,8 +618,47 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'repost_count': int,
|
||||
'genres': ['Piano'],
|
||||
'uploader_url': 'https://soundcloud.com/giovannisarani',
|
||||
'tags': 'count:10',
|
||||
},
|
||||
},
|
||||
# .png "original" artwork, 160kbps m4a HLS format
|
||||
{
|
||||
'url': 'https://soundcloud.com/skorxh/audio-dealer',
|
||||
'info_dict': {
|
||||
'id': '2011421339',
|
||||
'ext': 'm4a',
|
||||
'title': 'audio dealer',
|
||||
'description': '',
|
||||
'uploader': '$KORCH',
|
||||
'uploader_id': '150292288',
|
||||
'uploader_url': 'https://soundcloud.com/skorxh',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'duration': 213.469,
|
||||
'tags': [],
|
||||
'artists': ['$KORXH'],
|
||||
'track': 'audio dealer',
|
||||
'timestamp': 1737143201,
|
||||
'upload_date': '20250117',
|
||||
'license': 'all-rights-reserved',
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-original.png',
|
||||
'thumbnails': [
|
||||
{'id': 'mini', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-mini.jpg'},
|
||||
{'id': 'tiny', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-tiny.jpg'},
|
||||
{'id': 'small', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-small.jpg'},
|
||||
{'id': 'badge', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-badge.jpg'},
|
||||
{'id': 't67x67', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t67x67.jpg'},
|
||||
{'id': 'large', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-large.jpg'},
|
||||
{'id': 't300x300', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t300x300.jpg'},
|
||||
{'id': 'crop', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-crop.jpg'},
|
||||
{'id': 't500x500', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t500x500.jpg'},
|
||||
{'id': 'original', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-original.png'},
|
||||
],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'},
|
||||
},
|
||||
{
|
||||
# AAC HQ format available (account with active subscription needed)
|
||||
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
|
||||
@@ -662,6 +718,11 @@ class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
|
||||
'playlistId': playlist_id,
|
||||
'playlistSecretToken': token,
|
||||
}, headers=self._HEADERS)
|
||||
album_info = traverse_obj(playlist, {
|
||||
'album': ('title', {str}),
|
||||
'album_artist': ('user', 'username', {str}),
|
||||
'album_type': ('set_type', {str}, {lambda x: x or 'playlist'}),
|
||||
})
|
||||
entries = []
|
||||
for track in tracks:
|
||||
track_id = str_or_none(track.get('id'))
|
||||
@@ -673,11 +734,17 @@ class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
|
||||
if token:
|
||||
url += '?secret_token=' + token
|
||||
entries.append(self.url_result(
|
||||
url, SoundcloudIE.ie_key(), track_id))
|
||||
url, SoundcloudIE.ie_key(), track_id, url_transparent=True, **album_info))
|
||||
return self.playlist_result(
|
||||
entries, playlist_id,
|
||||
playlist.get('title'),
|
||||
playlist.get('description'))
|
||||
playlist.get('description'),
|
||||
**album_info,
|
||||
**traverse_obj(playlist, {
|
||||
'uploader': ('user', 'username', {str}),
|
||||
'uploader_id': ('user', 'id', {str_or_none}),
|
||||
}),
|
||||
)
|
||||
|
||||
|
||||
class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
||||
@@ -689,6 +756,11 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
||||
'id': '2284613',
|
||||
'title': 'The Royal Concept EP',
|
||||
'description': 'md5:71d07087c7a449e8941a70a29e34671e',
|
||||
'uploader': 'The Royal Concept',
|
||||
'uploader_id': '9615865',
|
||||
'album': 'The Royal Concept EP',
|
||||
'album_artists': ['The Royal Concept'],
|
||||
'album_type': 'ep',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
@@ -782,7 +854,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
||||
(?:(?:www|m)\.)?soundcloud\.com/
|
||||
(?P<user>[^/]+)
|
||||
(?:/
|
||||
(?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
|
||||
(?P<rsrc>tracks|albums|sets|reposts|likes|spotlight|comments)
|
||||
)?
|
||||
/?(?:[?#].*)?$
|
||||
'''
|
||||
@@ -836,6 +908,13 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
||||
'title': 'Grynpyret (Spotlight)',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/one-thousand-and-one/comments',
|
||||
'info_dict': {
|
||||
'id': '992430331',
|
||||
'title': '7x11x13-testing (Comments)',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_BASE_URL_MAP = {
|
||||
@@ -846,6 +925,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
||||
'reposts': 'stream/users/%s/reposts',
|
||||
'likes': 'users/%s/likes',
|
||||
'spotlight': 'users/%s/spotlight',
|
||||
'comments': 'users/%s/comments',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -966,6 +1046,11 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
'id': '4110309',
|
||||
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
'description': 're:.*?TILT Brass - Bowery Poetry Club',
|
||||
'uploader': 'Non-Site Records',
|
||||
'uploader_id': '33660914',
|
||||
'album_artists': ['Non-Site Records'],
|
||||
'album_type': 'playlist',
|
||||
'album': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from .bunnycdn import BunnyCdnIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import try_get, unified_timestamp
|
||||
from ..utils import make_archive_id, try_get, unified_timestamp
|
||||
|
||||
|
||||
class SovietsClosetBaseIE(InfoExtractor):
|
||||
@@ -43,7 +44,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||
'url': 'https://sovietscloset.com/video/1337',
|
||||
'md5': 'bd012b04b261725510ca5383074cdd55',
|
||||
'info_dict': {
|
||||
'id': '1337',
|
||||
'id': '2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Witcher #13',
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
|
||||
@@ -55,20 +56,23 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||
'upload_date': '20170413',
|
||||
'uploader_id': 'SovietWomble',
|
||||
'uploader_url': 'https://www.twitch.tv/SovietWomble',
|
||||
'duration': 7007,
|
||||
'duration': 7008,
|
||||
'was_live': True,
|
||||
'availability': 'public',
|
||||
'series': 'The Witcher',
|
||||
'season': 'Misc',
|
||||
'episode_number': 13,
|
||||
'episode': 'Episode 13',
|
||||
'creators': ['SovietWomble'],
|
||||
'description': '',
|
||||
'_old_archive_ids': ['sovietscloset 1337'],
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://sovietscloset.com/video/1105',
|
||||
'md5': '89fa928f183893cb65a0b7be846d8a90',
|
||||
'info_dict': {
|
||||
'id': '1105',
|
||||
'id': 'c0e5e76f-3a93-40b4-bf01-12343c2eec5d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arma 3 - Zeus Games #5',
|
||||
'uploader': 'SovietWomble',
|
||||
@@ -80,39 +84,20 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||
'upload_date': '20160420',
|
||||
'uploader_id': 'SovietWomble',
|
||||
'uploader_url': 'https://www.twitch.tv/SovietWomble',
|
||||
'duration': 8804,
|
||||
'duration': 8805,
|
||||
'was_live': True,
|
||||
'availability': 'public',
|
||||
'series': 'Arma 3',
|
||||
'season': 'Zeus Games',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
'creators': ['SovietWomble'],
|
||||
'description': '',
|
||||
'_old_archive_ids': ['sovietscloset 1105'],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id):
|
||||
iframe = self._download_webpage(
|
||||
f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}',
|
||||
video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER)
|
||||
|
||||
m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url')
|
||||
thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
|
||||
|
||||
m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
|
||||
|
||||
if not m3u8_formats:
|
||||
duration = None
|
||||
else:
|
||||
duration = self._extract_m3u8_vod_duration(
|
||||
m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER)
|
||||
|
||||
return {
|
||||
'formats': m3u8_formats,
|
||||
'thumbnail': thumbnail_url,
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -122,13 +107,13 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||
|
||||
stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
|
||||
|
||||
return {
|
||||
return self.url_result(
|
||||
f'https://iframe.mediadelivery.net/embed/5105/{stream["bunnyId"]}', ie=BunnyCdnIE, url_transparent=True,
|
||||
**self.video_meta(
|
||||
video_id=video_id, game_name=stream['game']['name'],
|
||||
category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
|
||||
episode_number=stream.get('number'), stream_date=stream.get('date')),
|
||||
**self._extract_bunnycdn_iframe(video_id, stream['bunnyId']),
|
||||
}
|
||||
_old_archive_ids=[make_archive_id(self, video_id)])
|
||||
|
||||
|
||||
class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
|
||||
|
||||
@@ -207,7 +207,7 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
||||
|
||||
|
||||
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?(?:ppv|live)/(?P<id>\w+)'
|
||||
IE_NAME = 'theatercomplextown:ppv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||
@@ -229,6 +229,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.theater-complex.town/en/live/79akNM7bJeD5Fi9EP39aDp',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_PATH = 'events'
|
||||
|
||||
@@ -28,24 +28,21 @@ class StripchatIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers())
|
||||
data = self._search_json(
|
||||
r'<script\b[^>]*>\s*window\.__PRELOADED_STATE__\s*=',
|
||||
webpage, 'data', video_id, transform_source=lowercase_escape)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script\b[^>]*>\s*window\.__PRELOADED_STATE__\s*=(?P<value>.*?)<\/script>',
|
||||
webpage, 'data', default='{}', group='value'),
|
||||
video_id, transform_source=lowercase_escape, fatal=False)
|
||||
if not data:
|
||||
raise ExtractorError('Unable to find configuration for stream.')
|
||||
|
||||
if traverse_obj(data, ('viewCam', 'show'), expected_type=dict):
|
||||
raise ExtractorError('Model is in private show', expected=True)
|
||||
elif not traverse_obj(data, ('viewCam', 'model', 'isLive'), expected_type=bool):
|
||||
if traverse_obj(data, ('viewCam', 'show', {dict})):
|
||||
raise ExtractorError('Model is in a private show', expected=True)
|
||||
if not traverse_obj(data, ('viewCam', 'model', 'isLive', {bool})):
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int)
|
||||
model_id = data['viewCam']['model']['id']
|
||||
|
||||
formats = []
|
||||
for host in traverse_obj(data, ('config', 'data', (
|
||||
# HLS hosts are currently found in .configV3.static.features.hlsFallback.fallbackDomains[]
|
||||
# The rest of the path is for backwards compatibility and to guard against A/B testing
|
||||
for host in traverse_obj(data, ((('config', 'data'), ('configV3', 'static')), (
|
||||
(('features', 'featuresV2'), 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))):
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'https://edge-hls.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
|
||||
@@ -53,7 +50,7 @@ class StripchatIE(InfoExtractor):
|
||||
if formats:
|
||||
break
|
||||
if not formats:
|
||||
self.raise_no_formats('No active streams found', expected=True)
|
||||
self.raise_no_formats('Unable to extract stream host', video_id=video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
199
yt_dlp/extractor/subsplash.py
Normal file
199
yt_dlp/extractor/subsplash.py
Normal file
@@ -0,0 +1,199 @@
|
||||
import functools
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SubsplashBaseIE(InfoExtractor):
|
||||
def _get_headers(self, url, display_id):
|
||||
token = try_call(lambda: self._get_cookies(url)['ss-token-guest'].value)
|
||||
if not token:
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
token = (
|
||||
try_call(lambda: self._get_cookies(url)['ss-token-guest'].value)
|
||||
or urlh.get_header('x-api-token')
|
||||
or self._search_json(
|
||||
r'<script[^>]+\bid="shoebox-tokens"[^>]*>', webpage, 'shoebox tokens',
|
||||
display_id, default={}).get('apiToken')
|
||||
or self._search_regex(r'\\"tokens\\":{\\"guest\\":\\"([A-Za-z0-9._-]+)\\"', webpage, 'token', default=None))
|
||||
|
||||
if not token:
|
||||
self.report_warning('Unable to extract auth token')
|
||||
return None
|
||||
return {'Authorization': f'Bearer {token}'}
|
||||
|
||||
def _extract_video(self, data, video_id):
|
||||
formats = []
|
||||
video_data = traverse_obj(data, ('_embedded', 'video', '_embedded', {dict}))
|
||||
m3u8_url = traverse_obj(video_data, ('playlists', 0, '_links', 'related', 'href', {url_or_none}))
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
mp4_entry = traverse_obj(video_data, ('video-outputs', lambda _, v: url_or_none(v['_links']['related']['href']), any))
|
||||
if mp4_entry:
|
||||
formats.append({
|
||||
'url': mp4_entry['_links']['related']['href'],
|
||||
'format_id': 'direct',
|
||||
'quality': 1,
|
||||
**traverse_obj(mp4_entry, {
|
||||
'height': ('height', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'filesize': ('file_size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('summary_text', {str}),
|
||||
'thumbnail': ('_embedded', 'images', 0, '_links', 'related', 'href', {url_or_none}),
|
||||
'duration': ('_embedded', 'video', 'duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'release_timestamp': ('published_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class SubsplashIE(SubsplashBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?subsplash\.com/(?:u/)?[^/?#]+/[^/?#]+/(?:d/|mi/\+)(?P<id>\w+)',
|
||||
r'https?://(?:\w+\.)?subspla\.sh/(?P<id>\w+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://subsplash.com/u/skywatchtv/media/d/5whnx5s-the-grand-delusion-taking-place-right-now',
|
||||
'md5': 'd468729814e533cec86f1da505dec82d',
|
||||
'info_dict': {
|
||||
'id': '5whnx5s',
|
||||
'ext': 'mp4',
|
||||
'title': 'THE GRAND DELUSION TAKING PLACE RIGHT NOW!',
|
||||
'description': 'md5:220a630865c3697b0ec9dcb3a70cbc33',
|
||||
'upload_date': '20240901',
|
||||
'duration': 1710,
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)$',
|
||||
'modified_date': '20240901',
|
||||
'release_date': '20240901',
|
||||
'release_timestamp': 1725195600,
|
||||
'timestamp': 1725148800,
|
||||
'modified_timestamp': 1725195657,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://subsplash.com/u/prophecywatchers/media/d/n4dr8b2-the-transhumanist-plan-for-humanity-billy-crone',
|
||||
'md5': '01982d58021af81c969958459bd81f13',
|
||||
'info_dict': {
|
||||
'id': 'n4dr8b2',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Transhumanist Plan for Humanity | Billy Crone',
|
||||
'upload_date': '20240903',
|
||||
'duration': 1709,
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1725321600,
|
||||
'modified_date': '20241010',
|
||||
'release_date': '20240903',
|
||||
'release_timestamp': 1725379200,
|
||||
'modified_timestamp': 1728577804,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://subsplash.com/laiglesiadelcentro/vid/mi/+ecb6a6b?autoplay=true',
|
||||
'md5': '013c9b1e391dd4b34d8612439445deef',
|
||||
'info_dict': {
|
||||
'id': 'ecb6a6b',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)$',
|
||||
'release_timestamp': 1477095852,
|
||||
'title': 'En el Principio Era el Verbo | EVANGELIO DE JUAN | Ps. Gadiel Ríos',
|
||||
'timestamp': 1425772800,
|
||||
'upload_date': '20150308',
|
||||
'description': 'md5:f368221de93176654989ba66bb564798',
|
||||
'modified_timestamp': 1730258864,
|
||||
'modified_date': '20241030',
|
||||
'release_date': '20161022',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://prophecywatchers.subspla.sh/8gps8cx',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://core.subsplash.com/media/v1/media-items',
|
||||
video_id, headers=self._get_headers(url, video_id),
|
||||
query={
|
||||
'filter[short_code]': video_id,
|
||||
'include': 'images,audio.audio-outputs,audio.video,video.video-outputs,video.playlists,document,broadcast',
|
||||
})
|
||||
return self._extract_video(traverse_obj(data, ('_embedded', 'media-items', 0)), video_id)
|
||||
|
||||
|
||||
class SubsplashPlaylistIE(SubsplashBaseIE):
|
||||
IE_NAME = 'subsplash:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?subsplash\.com/[^/?#]+/(?:our-videos|media)/ms/\+(?P<id>\w+)'
|
||||
_PAGE_SIZE = 15
|
||||
_TESTS = [{
|
||||
'url': 'https://subsplash.com/skywatchtv/our-videos/ms/+dbyjzp8',
|
||||
'info_dict': {
|
||||
'id': 'dbyjzp8',
|
||||
'title': 'Five in Ten',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://subsplash.com/prophecywatchers/media/ms/+n42mr48',
|
||||
'info_dict': {
|
||||
'id': 'n42mr48',
|
||||
'title': 'Road to Zion Series',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}, {
|
||||
'url': 'https://subsplash.com/prophecywatchers/media/ms/+918b9f6',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id, headers, page):
|
||||
data = self._download_json(
|
||||
'https://core.subsplash.com/media/v1/media-items', series_id, headers=headers,
|
||||
query={
|
||||
'filter[broadcast.status|broadcast.status]': 'null|on-demand',
|
||||
'filter[media_series]': series_id,
|
||||
'filter[status]': 'published',
|
||||
'include': 'images,audio.audio-outputs,audio.video,video.video-outputs,video.playlists,document',
|
||||
'page[number]': page + 1,
|
||||
'page[size]': self._PAGE_SIZE,
|
||||
'sort': '-position',
|
||||
}, note=f'Downloading page {page + 1}')
|
||||
|
||||
for entry in traverse_obj(data, ('_embedded', 'media-items', lambda _, v: v['short_code'])):
|
||||
entry_id = entry['short_code']
|
||||
info = self._extract_video(entry, entry_id)
|
||||
yield {
|
||||
**info,
|
||||
'webpage_url': f'https://subspla.sh/{entry_id}',
|
||||
'extractor_key': SubsplashIE.ie_key(),
|
||||
'extractor': SubsplashIE.IE_NAME,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
headers = self._get_headers(url, display_id)
|
||||
|
||||
data = self._download_json(
|
||||
'https://core.subsplash.com/media/v1/media-series', display_id, headers=headers,
|
||||
query={'filter[short_code]': display_id})
|
||||
series_data = traverse_obj(data, ('_embedded', 'media-series', 0, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'count': ('media_items_count', {int}),
|
||||
}))
|
||||
total_pages = math.ceil(series_data['count'] / self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(
|
||||
InAdvancePagedList(functools.partial(self._entries, series_data['id'], headers), total_pages, self._PAGE_SIZE),
|
||||
display_id, series_data['title'])
|
||||
@@ -46,7 +46,7 @@ class TelecincoBaseIE(InfoExtractor):
|
||||
error_code = traverse_obj(
|
||||
self._webpage_read_content(error.cause.response, caronte['cerbero'], video_id, fatal=False),
|
||||
({json.loads}, 'code', {int}))
|
||||
if error_code == 4038:
|
||||
if error_code in (4038, 40313):
|
||||
self.raise_geo_restricted(countries=['ES'])
|
||||
raise
|
||||
|
||||
|
||||
@@ -118,8 +118,9 @@ class ThePlatformBaseIE(OnceIE):
|
||||
'categories', lambda _, v: v.get('label') in ('category', None), 'name', {str})) or None,
|
||||
'tags': traverse_obj(info, ('keywords', {lambda x: re.split(r'[;,]\s?', x) if x else None})),
|
||||
'location': extract_site_specific_field('region'),
|
||||
'series': extract_site_specific_field('show'),
|
||||
'series': extract_site_specific_field('show') or extract_site_specific_field('seriesTitle'),
|
||||
'season_number': int_or_none(extract_site_specific_field('seasonNumber')),
|
||||
'episode_number': int_or_none(extract_site_specific_field('episodeNumber')),
|
||||
'media_type': extract_site_specific_field('programmingType') or extract_site_specific_field('type'),
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ from ..utils import (
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
truncate_string,
|
||||
try_call,
|
||||
try_get,
|
||||
url_or_none,
|
||||
@@ -249,6 +250,12 @@ class TikTokBaseIE(InfoExtractor):
|
||||
elif fatal:
|
||||
raise ExtractorError('Unable to extract webpage video data')
|
||||
|
||||
if not traverse_obj(video_data, ('video', {dict})) and traverse_obj(video_data, ('isContentClassified', {bool})):
|
||||
message = 'This post may not be comfortable for some audiences. Log in for access'
|
||||
if fatal:
|
||||
self.raise_login_required(message)
|
||||
self.report_warning(f'{message}. {self._login_hint()}', video_id=video_id)
|
||||
|
||||
return video_data, status
|
||||
|
||||
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
||||
@@ -413,15 +420,6 @@ class TikTokBaseIE(InfoExtractor):
|
||||
for f in formats:
|
||||
self._set_cookie(urllib.parse.urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value)
|
||||
|
||||
thumbnails = []
|
||||
for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
|
||||
'origin_cover', 'dynamic_cover'):
|
||||
for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...)):
|
||||
thumbnails.append({
|
||||
'id': cover_id,
|
||||
'url': cover_url,
|
||||
})
|
||||
|
||||
stats_info = aweme_detail.get('statistics') or {}
|
||||
music_info = aweme_detail.get('music') or {}
|
||||
labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str)
|
||||
@@ -447,7 +445,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
return {
|
||||
'id': aweme_id,
|
||||
**traverse_obj(aweme_detail, {
|
||||
'title': ('desc', {str}),
|
||||
'title': ('desc', {truncate_string(left=72)}),
|
||||
'description': ('desc', {str}),
|
||||
'timestamp': ('create_time', {int_or_none}),
|
||||
}),
|
||||
@@ -467,7 +465,17 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(
|
||||
aweme_detail, aweme_id, traverse_obj(author_info, 'uploader', 'uploader_id', 'channel_id')),
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': cover_id,
|
||||
'url': cover_url,
|
||||
'preference': -1 if cover_id in ('cover', 'origin_cover') else -2,
|
||||
}
|
||||
for cover_id in (
|
||||
'cover', 'ai_dynamic_cover', 'animated_cover',
|
||||
'ai_dynamic_cover_bak', 'origin_cover', 'dynamic_cover')
|
||||
for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...))
|
||||
],
|
||||
'duration': (traverse_obj(video_info, (
|
||||
(None, 'download_addr'), 'duration', {int_or_none(scale=1000)}, any))
|
||||
or traverse_obj(music_info, ('duration', {int_or_none}))),
|
||||
@@ -588,7 +596,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(aweme_detail, {
|
||||
'title': ('desc', {str}),
|
||||
'title': ('desc', {truncate_string(left=72)}),
|
||||
'description': ('desc', {str}),
|
||||
# audio-only slideshows have a video duration of 0 and an actual audio duration
|
||||
'duration': ('video', 'duration', {int_or_none}, filter),
|
||||
@@ -600,11 +608,15 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'repost_count': 'shareCount',
|
||||
'comment_count': 'commentCount',
|
||||
}), expected_type=int_or_none),
|
||||
'thumbnails': traverse_obj(aweme_detail, (
|
||||
(None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {
|
||||
'url': ({url_or_none}, {self._proto_relative_url}),
|
||||
},
|
||||
)),
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': cover_id,
|
||||
'url': self._proto_relative_url(cover_url),
|
||||
'preference': -2 if cover_id == 'dynamicCover' else -1,
|
||||
}
|
||||
for cover_id in ('thumbnail', 'cover', 'dynamicCover', 'originCover')
|
||||
for cover_url in traverse_obj(aweme_detail, ((None, 'video'), cover_id, {url_or_none}))
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -645,7 +657,7 @@ class TikTokIE(TikTokBaseIE):
|
||||
'info_dict': {
|
||||
'id': '6742501081818877190',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5e2a23877420bb85ce6521dbee39ba94',
|
||||
'title': 'Tag 1 Friend reverse this Video and look what happens 🤩😱 @skyandtami ...',
|
||||
'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
|
||||
'duration': 27,
|
||||
'height': 1024,
|
||||
@@ -849,7 +861,7 @@ class TikTokIE(TikTokBaseIE):
|
||||
'info_dict': {
|
||||
'id': '7253412088251534594',
|
||||
'ext': 'm4a',
|
||||
'title': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ',
|
||||
'title': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #р...',
|
||||
'description': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ',
|
||||
'uploader': 'hara_yoimiya',
|
||||
'uploader_id': '6582536342634676230',
|
||||
@@ -890,8 +902,12 @@ class TikTokIE(TikTokBaseIE):
|
||||
|
||||
if video_data and status == 0:
|
||||
return self._parse_aweme_video_web(video_data, url, video_id)
|
||||
elif status == 10216:
|
||||
raise ExtractorError('This video is private', expected=True)
|
||||
elif status in (10216, 10222):
|
||||
# 10216: private post; 10222: private account
|
||||
self.raise_login_required(
|
||||
'You do not have permission to view this post. Log into an account that has access')
|
||||
elif status == 10204:
|
||||
raise ExtractorError('Your IP address is blocked from accessing this post', expected=True)
|
||||
raise ExtractorError(f'Video not available, status code {status}', video_id=video_id)
|
||||
|
||||
|
||||
|
||||
@@ -189,26 +189,6 @@ class TumblrIE(InfoExtractor):
|
||||
'release_date': '20140227',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'http://sutiblr.tumblr.com/post/139638707273',
|
||||
'md5': '2dd184b3669e049ba40563a7d423f95c',
|
||||
'info_dict': {
|
||||
'id': 'ir7qBEIKqvq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vine by sutiblr',
|
||||
'alt_title': 'Vine by sutiblr',
|
||||
'uploader': 'sutiblr',
|
||||
'uploader_id': '1198993975374495744',
|
||||
'upload_date': '20160220',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1455940159,
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
'skip': 'Vine is unavailable',
|
||||
}, {
|
||||
'url': 'https://silami.tumblr.com/post/84250043974/my-bad-river-flows-in-you-impression-on-maschine',
|
||||
'md5': '3c92d7c3d867f14ccbeefa2119022277',
|
||||
@@ -366,7 +346,6 @@ class TumblrIE(InfoExtractor):
|
||||
_providers = {
|
||||
'instagram': 'Instagram',
|
||||
'vimeo': 'Vimeo',
|
||||
'vine': 'Vine',
|
||||
'youtube': 'Youtube',
|
||||
'dailymotion': 'Dailymotion',
|
||||
'tiktok': 'TikTok',
|
||||
|
||||
@@ -24,8 +24,6 @@ class TVerIE(InfoExtractor):
|
||||
'channel': 'テレビ朝日',
|
||||
'id': 'ep83nf3w4p',
|
||||
'ext': 'mp4',
|
||||
'onair_label': '5月3日(火)放送分',
|
||||
'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
|
||||
117
yt_dlp/extractor/tvw.py
Normal file
117
yt_dlp/extractor/tvw.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, remove_end, unified_timestamp, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TvwIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
|
||||
'md5': '9ceb94fe2bb7fd726f74f16356825703',
|
||||
'info_dict': {
|
||||
'id': '2024011211',
|
||||
'ext': 'mp4',
|
||||
'title': 'Billy Frank Jr. Statue Maquette Unveiling Ceremony',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
|
||||
'description': 'md5:58a8150017d985b4f377e11ee8f6f36e',
|
||||
'timestamp': 1704902400,
|
||||
'upload_date': '20240110',
|
||||
'location': 'Legislative Building',
|
||||
'display_id': 'billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211',
|
||||
'categories': ['General Interest'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tvw.org/video/ebeys-landing-state-park-2024081007/',
|
||||
'md5': '71e87dae3deafd65d75ff3137b9a32fc',
|
||||
'info_dict': {
|
||||
'id': '2024081007',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ebey\'s Landing State Park',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
|
||||
'description': 'md5:50c5bd73bde32fa6286a008dbc853386',
|
||||
'timestamp': 1724310900,
|
||||
'upload_date': '20240822',
|
||||
'location': 'Ebey’s Landing State Park',
|
||||
'display_id': 'ebeys-landing-state-park-2024081007',
|
||||
'categories': ['Washington State Parks'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tvw.org/video/home-warranties-workgroup-2',
|
||||
'md5': 'f678789bf94d07da89809f213cf37150',
|
||||
'info_dict': {
|
||||
'id': '1999121000',
|
||||
'ext': 'mp4',
|
||||
'title': 'Home Warranties Workgroup',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
|
||||
'description': 'md5:861396cc523c9641d0dce690bc5c35f3',
|
||||
'timestamp': 946389600,
|
||||
'upload_date': '19991228',
|
||||
'display_id': 'home-warranties-workgroup-2',
|
||||
'categories': ['Legislative'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tvw.org/video/washington-to-washington-a-new-space-race-2022041111/?eventID=2022041111',
|
||||
'md5': '6f5551090b351aba10c0d08a881b4f30',
|
||||
'info_dict': {
|
||||
'id': '2022041111',
|
||||
'ext': 'mp4',
|
||||
'title': 'Washington to Washington - A New Space Race',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
|
||||
'description': 'md5:f65a24eec56107afbcebb3aa5cd26341',
|
||||
'timestamp': 1650394800,
|
||||
'upload_date': '20220419',
|
||||
'location': 'Hayner Media Center',
|
||||
'display_id': 'washington-to-washington-a-new-space-race-2022041111',
|
||||
'categories': ['Washington to Washington', 'General Interest'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
client_id = self._html_search_meta('clientID', webpage, fatal=True)
|
||||
video_id = self._html_search_meta('eventID', webpage, fatal=True)
|
||||
|
||||
video_data = self._download_json(
|
||||
'https://api.v3.invintus.com/v2/Event/getDetailed', video_id,
|
||||
headers={
|
||||
'authorization': 'embedder',
|
||||
'wsc-api-key': '7WhiEBzijpritypp8bqcU7pfU9uicDR',
|
||||
},
|
||||
data=json.dumps({
|
||||
'clientID': client_id,
|
||||
'eventID': video_id,
|
||||
'showStreams': True,
|
||||
}).encode())['data']
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for stream_url in traverse_obj(video_data, ('streamingURIs', ..., {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if caption_url := traverse_obj(video_data, ('captionPath', {url_or_none})):
|
||||
subtitles.setdefault('en', []).append({'url': caption_url, 'ext': 'vtt'})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'categories': ('categories', ..., {str}),
|
||||
'thumbnail': ('videoThumbnail', {url_or_none}),
|
||||
'timestamp': ('startDateTime', {unified_timestamp}),
|
||||
'location': ('locationName', {str}),
|
||||
'is_live': ('eventStatus', {lambda x: x == 'live'}),
|
||||
}),
|
||||
}
|
||||
@@ -1,11 +1,12 @@
|
||||
import functools
|
||||
import json
|
||||
import random
|
||||
import math
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||
from ..jsinterp import js_number_to_string
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -20,6 +21,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
truncate_string,
|
||||
try_call,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
@@ -357,6 +359,7 @@ class TwitterCardIE(InfoExtractor):
|
||||
'display_id': '560070183650213889',
|
||||
'uploader_url': 'https://twitter.com/Twitter',
|
||||
},
|
||||
'skip': 'This content is no longer available.',
|
||||
},
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
|
||||
@@ -364,7 +367,7 @@ class TwitterCardIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '623160978427936768',
|
||||
'ext': 'mp4',
|
||||
'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
|
||||
'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASA...",
|
||||
'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
|
||||
'uploader': 'NASA',
|
||||
'uploader_id': 'NASA',
|
||||
@@ -376,12 +379,14 @@ class TwitterCardIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'tags': ['PlutoFlyby'],
|
||||
'channel_id': '11348282',
|
||||
'_old_archive_ids': ['twitter 623160978427936768'],
|
||||
},
|
||||
'params': {'format': '[protocol=https]'},
|
||||
},
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
|
||||
'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
|
||||
'md5': 'fb08fbd69595cbd8818f0b2f2a94474d',
|
||||
'info_dict': {
|
||||
'id': 'dq4Oj5quskI',
|
||||
'ext': 'mp4',
|
||||
@@ -389,12 +394,12 @@ class TwitterCardIE(InfoExtractor):
|
||||
'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
|
||||
'upload_date': '20111013',
|
||||
'uploader': 'OMG! UBUNTU!',
|
||||
'uploader_id': 'omgubuntu',
|
||||
'uploader_id': '@omgubuntu',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
|
||||
'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
|
||||
'channel_follower_count': int,
|
||||
'chapters': 'count:8',
|
||||
'uploader_url': 'http://www.youtube.com/user/omgubuntu',
|
||||
'uploader_url': 'https://www.youtube.com/@omgubuntu',
|
||||
'duration': 138,
|
||||
'categories': ['Film & Animation'],
|
||||
'age_limit': 0,
|
||||
@@ -406,29 +411,12 @@ class TwitterCardIE(InfoExtractor):
|
||||
'tags': 'count:12',
|
||||
'channel': 'OMG! UBUNTU!',
|
||||
'playable_in_embed': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1318500227,
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
|
||||
'info_dict': {
|
||||
'id': 'iBb2x00UVlv',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20151113',
|
||||
'uploader_id': '1189339351084113920',
|
||||
'uploader': 'ArsenalTerje',
|
||||
'title': 'Vine by ArsenalTerje',
|
||||
'timestamp': 1447451307,
|
||||
'alt_title': 'Vine by ArsenalTerje',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg',
|
||||
'view_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
},
|
||||
{
|
||||
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
||||
'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
|
||||
@@ -567,32 +555,14 @@ class TwitterIE(TwitterBaseIE):
|
||||
'age_limit': 0,
|
||||
'_old_archive_ids': ['twitter 700207533655363584'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
||||
'md5': '89a15ed345d13b86e9a5a5e051fa308a',
|
||||
'info_dict': {
|
||||
'id': 'MIOxnrUteUd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
|
||||
'uploader': 'TAKUMA',
|
||||
'uploader_id': '1004126642786242560',
|
||||
'timestamp': 1402826626,
|
||||
'upload_date': '20140615',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'alt_title': 'Vine by TAKUMA',
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
'skip': 'Tweet has been deleted',
|
||||
}, {
|
||||
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
|
||||
'info_dict': {
|
||||
'id': '717462543795523584',
|
||||
'display_id': '719944021058060289',
|
||||
'ext': 'mp4',
|
||||
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
|
||||
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theat...',
|
||||
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
|
||||
'channel_id': '701615052',
|
||||
'uploader_id': 'CaptainAmerica',
|
||||
@@ -629,7 +599,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'info_dict': {
|
||||
'id': '852077943283097602',
|
||||
'ext': 'mp4',
|
||||
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
|
||||
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعا...',
|
||||
'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
|
||||
'channel_id': '2526757026',
|
||||
'uploader': 'عالم الأخبار',
|
||||
@@ -653,7 +623,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '910030238373089285',
|
||||
'display_id': '910031516746514432',
|
||||
'ext': 'mp4',
|
||||
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
|
||||
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terr...',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
|
||||
'channel_id': '2319432498',
|
||||
@@ -745,7 +715,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1349774757969989634',
|
||||
'display_id': '1349794411333394432',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
|
||||
'title': "Brooklyn Nets - WATCH: Sean Marks' full media session after our acquisition of 8-time...",
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
|
||||
'channel_id': '18552281',
|
||||
@@ -771,7 +741,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1577855447914409984',
|
||||
'display_id': '1577855540407197696',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
|
||||
'title': 'Oshtru - gm ✨️ now I can post image and video. nice update.',
|
||||
'description': 'md5:b9c3699335447391d11753ab21c70a74',
|
||||
'upload_date': '20221006',
|
||||
'channel_id': '143077138',
|
||||
@@ -793,10 +763,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||
'info_dict': {
|
||||
'id': '1577719286659006464',
|
||||
'title': 'Ultima Reload - Test',
|
||||
'title': 'Ultima - Test',
|
||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||
'channel_id': '168922496',
|
||||
'uploader': 'Ultima Reload',
|
||||
'uploader': 'Ultima',
|
||||
'uploader_id': 'UltimaShadowX',
|
||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||
'upload_date': '20221005',
|
||||
@@ -815,7 +785,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1575559336759263233',
|
||||
'display_id': '1575560063510810624',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
|
||||
'title': 'Max Olson - Absolutely heartbreaking footage captured by our surge probe of catas...',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:95aea692fda36a12081b9629b02daa92',
|
||||
'channel_id': '1094109584',
|
||||
@@ -939,18 +909,18 @@ class TwitterIE(TwitterBaseIE):
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
'id': '1600649710662213632',
|
||||
'title': 'md5:be05989b0722e114103ed3851a0ffae2',
|
||||
'title': "Jocelyn Laidlaw - How Kirstie Alley's tragic death inspired me to share more about my c...",
|
||||
'timestamp': 1670459604.0,
|
||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||
'comment_count': int,
|
||||
'uploader_id': 'CTVJLaidlaw',
|
||||
'uploader_id': 'JocelynVLaidlaw',
|
||||
'channel_id': '80082014',
|
||||
'repost_count': int,
|
||||
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
|
||||
'upload_date': '20221208',
|
||||
'age_limit': 0,
|
||||
'uploader': 'Jocelyn Laidlaw',
|
||||
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
||||
'uploader_url': 'https://twitter.com/JocelynVLaidlaw',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
@@ -959,17 +929,17 @@ class TwitterIE(TwitterBaseIE):
|
||||
'info_dict': {
|
||||
'id': '1600649511827013632',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
|
||||
'title': "Jocelyn Laidlaw - How Kirstie Alley's tragic death inspired me to share more about my c... #1",
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'timestamp': 1670459604.0,
|
||||
'channel_id': '80082014',
|
||||
'uploader_id': 'CTVJLaidlaw',
|
||||
'uploader_id': 'JocelynVLaidlaw',
|
||||
'uploader': 'Jocelyn Laidlaw',
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
|
||||
'duration': 102.226,
|
||||
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
||||
'uploader_url': 'https://twitter.com/JocelynVLaidlaw',
|
||||
'display_id': '1600649710662213632',
|
||||
'like_count': int,
|
||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||
@@ -1028,6 +998,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'_old_archive_ids': ['twitter 1599108751385972737'],
|
||||
},
|
||||
'params': {'noplaylist': True},
|
||||
'skip': 'Tweet is limited',
|
||||
}, {
|
||||
'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
|
||||
'info_dict': {
|
||||
@@ -1039,10 +1010,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||
'age_limit': 0,
|
||||
'uploader': 'Mün',
|
||||
'uploader': 'Boy Called Mün',
|
||||
'repost_count': int,
|
||||
'upload_date': '20221206',
|
||||
'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||
'title': 'Boy Called Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': [],
|
||||
@@ -1080,7 +1051,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1694928337846538240',
|
||||
'ext': 'mp4',
|
||||
'display_id': '1695424220702888009',
|
||||
'title': 'md5:e8daa9527bc2b947121395494f786d9d',
|
||||
'title': 'Benny Johnson - Donald Trump driving through the urban, poor neighborhoods of Atlanta...',
|
||||
'description': 'md5:004f2d37fd58737724ec75bc7e679938',
|
||||
'channel_id': '15212187',
|
||||
'uploader': 'Benny Johnson',
|
||||
@@ -1104,7 +1075,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1694928337846538240',
|
||||
'ext': 'mp4',
|
||||
'display_id': '1695424220702888009',
|
||||
'title': 'md5:e8daa9527bc2b947121395494f786d9d',
|
||||
'title': 'Benny Johnson - Donald Trump driving through the urban, poor neighborhoods of Atlanta...',
|
||||
'description': 'md5:004f2d37fd58737724ec75bc7e679938',
|
||||
'channel_id': '15212187',
|
||||
'uploader': 'Benny Johnson',
|
||||
@@ -1139,6 +1110,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['TwitterBroadcast'],
|
||||
'skip': 'Broadcast no longer exists',
|
||||
}, {
|
||||
# Animated gif and quote tweet video
|
||||
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
|
||||
@@ -1167,7 +1139,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'info_dict': {
|
||||
'id': '1724883339285544960',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
|
||||
'title': 'Robert F. Kennedy Jr - A beautifully crafted short film by Mikki Willis about my independent...',
|
||||
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
|
||||
'display_id': '1724884212803834154',
|
||||
'channel_id': '337808606',
|
||||
@@ -1188,7 +1160,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
}, {
|
||||
# x.com
|
||||
'url': 'https://x.com/historyinmemes/status/1790637656616943991',
|
||||
'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
|
||||
'md5': '4549eda363fecfe37439c455923cba2c',
|
||||
'info_dict': {
|
||||
'id': '1790637589910654976',
|
||||
'ext': 'mp4',
|
||||
@@ -1369,6 +1341,11 @@ class TwitterIE(TwitterBaseIE):
|
||||
},
|
||||
}
|
||||
|
||||
def _generate_syndication_token(self, twid):
|
||||
# ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
||||
translation = str.maketrans(dict.fromkeys('0.'))
|
||||
return js_number_to_string((int(twid) / 1e15) * math.pi, 36).translate(translation)
|
||||
|
||||
def _call_syndication_api(self, twid):
|
||||
self.report_warning(
|
||||
'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
|
||||
@@ -1376,8 +1353,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
|
||||
headers={'User-Agent': 'Googlebot'}, query={
|
||||
'id': twid,
|
||||
# TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
|
||||
'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
|
||||
'token': self._generate_syndication_token(twid),
|
||||
})
|
||||
if not status:
|
||||
raise ExtractorError('Syndication endpoint returned empty JSON response')
|
||||
@@ -1424,7 +1400,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
title = description = traverse_obj(
|
||||
status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
|
||||
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
||||
title = re.sub(r'\s+(https?://[^ ]+)', '', title)
|
||||
title = truncate_string(re.sub(r'\s+(https?://[^ ]+)', '', title), left=72)
|
||||
user = status.get('user') or {}
|
||||
uploader = user.get('name')
|
||||
if uploader:
|
||||
|
||||
@@ -50,6 +50,9 @@ class KnownDRMIE(UnsupportedInfoExtractor):
|
||||
r'music\.amazon\.(?:\w{2}\.)?\w+',
|
||||
r'(?:watch|front)\.njpwworld\.com',
|
||||
r'qub\.ca/vrai',
|
||||
r'(?:beta\.)?crunchyroll\.com',
|
||||
r'viki\.com',
|
||||
r'deezer\.com',
|
||||
)
|
||||
|
||||
_TESTS = [{
|
||||
@@ -153,6 +156,18 @@ class KnownDRMIE(UnsupportedInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.qub.ca/vrai/l-effet-bocuse-d-or/saison-1/l-effet-bocuse-d-or-saison-1-bande-annonce-1098225063',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.deezer.com/playlist/176747451',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -14,59 +14,69 @@ class VideocampusSachsenIE(InfoExtractor):
|
||||
'corporate.demo.vimp.com',
|
||||
'dancehalldatabase.com',
|
||||
'drehzahl.tv',
|
||||
'educhannel.hs-gesundheit.de',
|
||||
'educhannel.hs-gesundheit.de', # Hochschule für Gesundheit NRW
|
||||
'emedia.ls.haw-hamburg.de',
|
||||
'globale-evolution.net',
|
||||
'hohu.tv',
|
||||
'htvideos.hightechhigh.org',
|
||||
'k210039.vimp.mivitec.net',
|
||||
'media.cmslegal.com',
|
||||
'media.hs-furtwangen.de',
|
||||
'media.hwr-berlin.de',
|
||||
'media.fh-swf.de', # Fachhochschule Südwestfalen
|
||||
'media.hs-furtwangen.de', # Hochschule Furtwangen
|
||||
'media.hwr-berlin.de', # Hochschule für Wirtschaft und Recht Berlin
|
||||
'mediathek.dkfz.de',
|
||||
'mediathek.htw-berlin.de',
|
||||
'mediathek.htw-berlin.de', # Hochschule für Technik und Wirtschaft Berlin
|
||||
'mediathek.polizei-bw.de',
|
||||
'medien.hs-merseburg.de',
|
||||
'mportal.europa-uni.de',
|
||||
'medien.hs-merseburg.de', # Hochschule Merseburg
|
||||
'mitmedia.manukau.ac.nz', # Manukau Institute of Technology Auckland (NZ)
|
||||
'mportal.europa-uni.de', # Europa-Universität Viadrina
|
||||
'pacific.demo.vimp.com',
|
||||
'slctv.com',
|
||||
'streaming.prairiesouth.ca',
|
||||
'tube.isbonline.cn',
|
||||
'univideo.uni-kassel.de',
|
||||
'univideo.uni-kassel.de', # Universität Kassel
|
||||
'ursula2.genetics.emory.edu',
|
||||
'ursulablicklevideoarchiv.com',
|
||||
'v.agrarumweltpaedagogik.at',
|
||||
'video.eplay-tv.de',
|
||||
'video.fh-dortmund.de',
|
||||
'video.hs-offenburg.de',
|
||||
'video.hs-pforzheim.de',
|
||||
'video.hspv.nrw.de',
|
||||
'video.fh-dortmund.de', # Fachhochschule Dortmund
|
||||
'video.hs-nb.de', # Hochschule Neubrandenburg
|
||||
'video.hs-offenburg.de', # Hochschule Offenburg
|
||||
'video.hs-pforzheim.de', # Hochschule Pforzheim
|
||||
'video.hspv.nrw.de', # Hochschule für Polizei und öffentliche Verwaltung NRW
|
||||
'video.irtshdf.fr',
|
||||
'video.pareygo.de',
|
||||
'video.tu-freiberg.de',
|
||||
'videocampus.sachsen.de',
|
||||
'videoportal.uni-freiburg.de',
|
||||
'videoportal.vm.uni-freiburg.de',
|
||||
'video.tu-dortmund.de', # Technische Universität Dortmund
|
||||
'video.tu-freiberg.de', # Technische Universität Bergakademie Freiberg
|
||||
'videocampus.sachsen.de', # Video Campus Sachsen (gemeinsame Videoplattform sächsischer Universitäten, Hochschulen und der Berufsakademie Sachsen)
|
||||
'videoportal.uni-freiburg.de', # Albert-Ludwigs-Universität Freiburg
|
||||
'videoportal.vm.uni-freiburg.de', # Albert-Ludwigs-Universität Freiburg
|
||||
'videos.duoc.cl',
|
||||
'videos.uni-paderborn.de',
|
||||
'videos.uni-paderborn.de', # Universität Paderborn
|
||||
'vimp-bemus.udk-berlin.de',
|
||||
'vimp.aekwl.de',
|
||||
'vimp.hs-mittweida.de',
|
||||
'vimp.oth-regensburg.de',
|
||||
'vimp.ph-heidelberg.de',
|
||||
'vimp.landesfilmdienste.de',
|
||||
'vimp.oth-regensburg.de', # Ostbayerische Technische Hochschule Regensburg
|
||||
'vimp.ph-heidelberg.de', # Pädagogische Hochschule Heidelberg
|
||||
'vimp.sma-events.com',
|
||||
'vimp.weka-fachmedien.de',
|
||||
'vimpdesk.com',
|
||||
'webtv.univ-montp3.fr',
|
||||
'www.b-tu.de/media',
|
||||
'www.b-tu.de/media', # Brandenburgische Technische Universität Cottbus-Senftenberg
|
||||
'www.bergauf.tv',
|
||||
'www.bigcitytv.de',
|
||||
'www.cad-videos.de',
|
||||
'www.drehzahl.tv',
|
||||
'www.fh-bielefeld.de/medienportal',
|
||||
'www.hohu.tv',
|
||||
'www.hsbi.de/medienportal', # Hochschule Bielefeld
|
||||
'www.logistic.tv',
|
||||
'www.orvovideo.com',
|
||||
'www.printtube.co.uk',
|
||||
'www.rwe.tv',
|
||||
'www.salzi.tv',
|
||||
'www.signtube.co.uk',
|
||||
'www.twb-power.com',
|
||||
'www.wenglor-media.com',
|
||||
'www2.univ-sba.dz',
|
||||
)
|
||||
@@ -188,22 +198,23 @@ class VideocampusSachsenIE(InfoExtractor):
|
||||
class ViMPPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'ViMP:Playlist'
|
||||
_VALID_URL = r'''(?x)(?P<host>https?://(?:{}))/(?:
|
||||
album/view/aid/(?P<album_id>[0-9]+)|
|
||||
(?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+)
|
||||
(?P<mode1>album)/view/aid/(?P<album_id>[0-9]+)|
|
||||
(?P<mode2>category|channel)/(?P<name>[\w-]+)/(?P<channel_id>[0-9]+)|
|
||||
(?P<mode3>tag)/(?P<tag_id>[0-9]+)
|
||||
)'''.format('|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES)))
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3',
|
||||
'info_dict': {
|
||||
'id': 'channel-3',
|
||||
'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
|
||||
'title': 'Designtheorie 1 SoSe 2020 - Channels - ViMP OTH Regensburg',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'https://www.fh-bielefeld.de/medienportal/album/view/aid/208',
|
||||
'url': 'https://www.hsbi.de/medienportal/album/view/aid/208',
|
||||
'info_dict': {
|
||||
'id': 'album-208',
|
||||
'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
|
||||
'title': 'KG Praktikum ABT/MEC - Playlists - HSBI-Medienportal',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
@@ -213,6 +224,13 @@ class ViMPPlaylistIE(InfoExtractor):
|
||||
'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://videocampus.sachsen.de/tag/26902',
|
||||
'info_dict': {
|
||||
'id': 'tag-26902',
|
||||
'title': 'advanced mobile and v2x communication - Tags - VCS',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}]
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@@ -220,34 +238,37 @@ class ViMPPlaylistIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
f'{host}/media/ajax/component/boxList/{url_part}', playlist_id,
|
||||
query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
|
||||
urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
|
||||
urls = re.findall(r'"([^"]*/video/[^"]+)"', webpage)
|
||||
|
||||
for url in urls:
|
||||
yield self.url_result(host + url, VideocampusSachsenIE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, album_id, mode, name, playlist_id = self._match_valid_url(url).group(
|
||||
'host', 'album_id', 'mode', 'name', 'id')
|
||||
host, album_id, name, channel_id, tag_id, mode1, mode2, mode3 = self._match_valid_url(url).group(
|
||||
'host', 'album_id', 'name', 'channel_id', 'tag_id', 'mode1', 'mode2', 'mode3')
|
||||
|
||||
webpage = self._download_webpage(url, album_id or playlist_id, fatal=False) or ''
|
||||
mode = mode1 or mode2 or mode3
|
||||
playlist_id = album_id or channel_id or tag_id
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id, fatal=False) or ''
|
||||
title = (self._html_search_meta('title', webpage, fatal=False)
|
||||
or self._html_extract_title(webpage))
|
||||
|
||||
url_part = (f'aid/{album_id}' if album_id
|
||||
else f'category/{name}/category_id/{playlist_id}' if mode == 'category'
|
||||
else f'title/{name}/channel/{playlist_id}')
|
||||
else f'category/{name}/category_id/{channel_id}' if mode == 'category'
|
||||
else f'title/{name}/channel/{channel_id}' if mode == 'channel'
|
||||
else f'tag/{tag_id}')
|
||||
|
||||
mode = mode or 'album'
|
||||
data = {
|
||||
'vars[mode]': mode,
|
||||
f'vars[{mode}]': album_id or playlist_id,
|
||||
'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
|
||||
'vars[context_id]': album_id or playlist_id,
|
||||
f'vars[{mode}]': playlist_id,
|
||||
'vars[context]': '4' if album_id else '1' if mode == 'category' else '3' if mode == 'album' else '0',
|
||||
'vars[context_id]': playlist_id,
|
||||
'vars[layout]': 'thumb',
|
||||
'vars[per_page][thumb]': str(self._PAGE_SIZE),
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, host, url_part, album_id or playlist_id, data), self._PAGE_SIZE),
|
||||
playlist_title=title, id=f'{mode}-{album_id or playlist_id}')
|
||||
self._fetch_page, host, url_part, playlist_id, data), self._PAGE_SIZE),
|
||||
playlist_title=title, id=f'{mode}-{playlist_id}')
|
||||
|
||||
@@ -421,5 +421,5 @@ class VidyardIE(VidyardBaseIE):
|
||||
return self._process_video_json(video_json['chapters'][0], video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
[self._process_video_json(chapter, video_id) for chapter in video_json['chapters']],
|
||||
(self._process_video_json(chapter, video_id) for chapter in video_json['chapters']),
|
||||
str(video_json['playerUuid']), video_json.get('name'))
|
||||
|
||||
@@ -1,346 +0,0 @@
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class VikiBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s'
|
||||
|
||||
_DEVICE_ID = '112395910d'
|
||||
_APP = '100005a'
|
||||
_APP_VERSION = '6.11.3'
|
||||
_APP_SECRET = 'd96704b180208dbb2efa30fe44c48bd8690441af9f567ba8fd710a72badc85198f7472'
|
||||
|
||||
_GEO_BYPASS = False
|
||||
_NETRC_MACHINE = 'viki'
|
||||
|
||||
_token = None
|
||||
|
||||
_ERRORS = {
|
||||
'geo': 'Sorry, this content is not available in your region.',
|
||||
'upcoming': 'Sorry, this content is not yet available.',
|
||||
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
|
||||
}
|
||||
|
||||
def _stream_headers(self, timestamp, sig):
|
||||
return {
|
||||
'X-Viki-manufacturer': 'vivo',
|
||||
'X-Viki-device-model': 'vivo 1606',
|
||||
'X-Viki-device-os-ver': '6.0.1',
|
||||
'X-Viki-connection-type': 'WIFI',
|
||||
'X-Viki-carrier': '',
|
||||
'X-Viki-as-id': '100005a-1625321982-3932',
|
||||
'timestamp': str(timestamp),
|
||||
'signature': str(sig),
|
||||
'x-viki-app-ver': self._APP_VERSION,
|
||||
}
|
||||
|
||||
def _api_query(self, path, version=4, **kwargs):
|
||||
path += '?' if '?' not in path else '&'
|
||||
query = f'/v{version}/{path}app={self._APP}'
|
||||
if self._token:
|
||||
query += f'&token={self._token}'
|
||||
return query + ''.join(f'&{name}={val}' for name, val in kwargs.items())
|
||||
|
||||
def _sign_query(self, path):
|
||||
timestamp = int(time.time())
|
||||
query = self._api_query(path, version=5)
|
||||
sig = hmac.new(
|
||||
self._APP_SECRET.encode('ascii'), f'{query}&t={timestamp}'.encode('ascii'), hashlib.sha1).hexdigest()
|
||||
return timestamp, sig, self._API_URL_TEMPLATE % query
|
||||
|
||||
def _call_api(
|
||||
self, path, video_id, note='Downloading JSON metadata', data=None, query=None, fatal=True):
|
||||
if query is None:
|
||||
timestamp, sig, url = self._sign_query(path)
|
||||
else:
|
||||
url = self._API_URL_TEMPLATE % self._api_query(path, version=4)
|
||||
resp = self._download_json(
|
||||
url, video_id, note, fatal=fatal, query=query,
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
headers=({'x-viki-app-ver': self._APP_VERSION} if data
|
||||
else self._stream_headers(timestamp, sig) if query is None
|
||||
else None), expected_status=400) or {}
|
||||
|
||||
self._raise_error(resp.get('error'), fatal)
|
||||
return resp
|
||||
|
||||
def _raise_error(self, error, fatal=True):
|
||||
if error is None:
|
||||
return
|
||||
msg = f'{self.IE_NAME} said: {error}'
|
||||
if fatal:
|
||||
raise ExtractorError(msg, expected=True)
|
||||
else:
|
||||
self.report_warning(msg)
|
||||
|
||||
def _check_errors(self, data):
|
||||
for reason, status in (data.get('blocking') or {}).items():
|
||||
if status and reason in self._ERRORS:
|
||||
message = self._ERRORS[reason]
|
||||
if reason == 'geo':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
elif reason == 'paywall':
|
||||
if try_get(data, lambda x: x['paywallable']['tvod']):
|
||||
self._raise_error('This video is for rent only or TVOD (Transactional Video On demand)')
|
||||
self.raise_login_required(message)
|
||||
self._raise_error(message)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._token = self._call_api(
|
||||
'sessions.json', None, 'Logging in', fatal=False,
|
||||
data={'username': username, 'password': password}).get('token')
|
||||
if not self._token:
|
||||
self.report_warning('Login Failed: Unable to get session token')
|
||||
|
||||
@staticmethod
|
||||
def dict_selection(dict_obj, preferred_key):
|
||||
if preferred_key in dict_obj:
|
||||
return dict_obj[preferred_key]
|
||||
return (list(filter(None, dict_obj.values())) or [None])[0]
|
||||
|
||||
|
||||
class VikiIE(VikiBaseIE):
|
||||
IE_NAME = 'viki'
|
||||
_VALID_URL = rf'{VikiBaseIE._VALID_URL_BASE}(?:videos|player)/(?P<id>[0-9]+v)'
|
||||
_TESTS = [{
|
||||
'note': 'Free non-DRM video with storyboards in MPD',
|
||||
'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1',
|
||||
'info_dict': {
|
||||
'id': '1175236v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Choosing Spouse by Lottery - Episode 1',
|
||||
'timestamp': 1606463239,
|
||||
'age_limit': 13,
|
||||
'uploader': 'FCC',
|
||||
'upload_date': '20201127',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||
'info_dict': {
|
||||
'id': '1023585v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heirs - Episode 14',
|
||||
'uploader': 'SBS Contents Hub',
|
||||
'timestamp': 1385047627,
|
||||
'upload_date': '20131121',
|
||||
'age_limit': 13,
|
||||
'duration': 3570,
|
||||
'episode_number': 14,
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
|
||||
'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
|
||||
'info_dict': {
|
||||
'id': '1067139v',
|
||||
'ext': 'mp4',
|
||||
'title': "'The Avengers: Age of Ultron' Press Conference",
|
||||
'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
|
||||
'duration': 352,
|
||||
'timestamp': 1430380829,
|
||||
'upload_date': '20150430',
|
||||
'uploader': 'Arirang TV',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Sorry. There was an error loading this video',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
|
||||
'info_dict': {
|
||||
'id': '1048879v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ankhon Dekhi',
|
||||
'duration': 6512,
|
||||
'timestamp': 1408532356,
|
||||
'upload_date': '20140820',
|
||||
'uploader': 'Spuul',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||
'md5': '0a53dc252e6e690feccd756861495a8c',
|
||||
'info_dict': {
|
||||
'id': '44699v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Boys Over Flowers - Episode 1',
|
||||
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
||||
'duration': 4172,
|
||||
'timestamp': 1270496524,
|
||||
'upload_date': '20100405',
|
||||
'uploader': 'group8',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
'episode_number': 1,
|
||||
},
|
||||
}, {
|
||||
# youtube external
|
||||
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
||||
'md5': '63f8600c1da6f01b7640eee7eca4f1da',
|
||||
'info_dict': {
|
||||
'id': '50562v',
|
||||
'ext': 'webm',
|
||||
'title': 'Poor Nastya [COMPLETE] - Episode 1',
|
||||
'description': '',
|
||||
'duration': 606,
|
||||
'timestamp': 1274949505,
|
||||
'upload_date': '20101213',
|
||||
'uploader': 'ad14065n',
|
||||
'uploader_id': 'ad14065n',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
},
|
||||
'skip': 'Page not found!',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/player/44699v',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': '41faaba0de90483fb4848952af7c7d0d',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'I Planet Entertainment',
|
||||
'upload_date': '20111122',
|
||||
'timestamp': 1321985454,
|
||||
'description': 'md5:44b1e46619df3a072294645c770cef36',
|
||||
'title': 'Love In Magic',
|
||||
'age_limit': 13,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._call_api(f'videos/{video_id}.json', video_id, 'Downloading video JSON', query={})
|
||||
self._check_errors(video)
|
||||
|
||||
title = try_get(video, lambda x: x['titles']['en'], str)
|
||||
episode_number = int_or_none(video.get('number'))
|
||||
if not title:
|
||||
title = f'Episode {episode_number}' if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
|
||||
container_title = self.dict_selection(container_titles, 'en')
|
||||
title = f'{container_title} - {title}'
|
||||
|
||||
thumbnails = [{
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail['url'],
|
||||
} for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')]
|
||||
|
||||
resp = self._call_api(
|
||||
f'playback_streams/{video_id}.json?drms=dt3&device_id={self._DEVICE_ID}',
|
||||
video_id, 'Downloading video streams JSON')['main'][0]
|
||||
|
||||
stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id'])
|
||||
subtitles = dict((lang, [{
|
||||
'ext': ext,
|
||||
'url': self._API_URL_TEMPLATE % self._api_query(
|
||||
f'videos/{video_id}/auth_subtitles/{lang}.{ext}', stream_id=stream_id),
|
||||
} for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}))
|
||||
|
||||
mpd_url = resp['url']
|
||||
# 720p is hidden in another MPD which can be found in the current manifest content
|
||||
mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
|
||||
mpd_url = self._search_regex(
|
||||
r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
|
||||
if 'mpdhd_high' not in mpd_url and 'sig=' not in mpd_url:
|
||||
# Modify the URL to get 1080p
|
||||
mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
|
||||
formats = self._extract_mpd_formats(mpd_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self.dict_selection(video.get('descriptions', {}), 'en'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('created_at')),
|
||||
'uploader': video.get('author'),
|
||||
'uploader_url': video.get('author_url'),
|
||||
'like_count': int_or_none(try_get(video, lambda x: x['likes']['count'])),
|
||||
'age_limit': parse_age_limit(video.get('rating')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
|
||||
class VikiChannelIE(VikiBaseIE):
|
||||
IE_NAME = 'viki:channel'
|
||||
_VALID_URL = rf'{VikiBaseIE._VALID_URL_BASE}(?:tv|news|movies|artists)/(?P<id>[0-9]+c)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
|
||||
'info_dict': {
|
||||
'id': '50c',
|
||||
'title': 'Boys Over Flowers',
|
||||
'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
|
||||
'info_dict': {
|
||||
'id': '1354c',
|
||||
'title': 'Poor Nastya [COMPLETE]',
|
||||
'description': 'md5:05bf5471385aa8b21c18ad450e350525',
|
||||
},
|
||||
'playlist_count': 127,
|
||||
'skip': 'Page not found',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/news/24569c-showbiz-korea',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.viki.com/artists/2141c-shinee',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_video_types = ('episodes', 'movies', 'clips', 'trailers')
|
||||
|
||||
def _entries(self, channel_id):
|
||||
params = {
|
||||
'app': self._APP, 'token': self._token, 'only_ids': 'true',
|
||||
'direction': 'asc', 'sort': 'number', 'per_page': 30,
|
||||
}
|
||||
video_types = self._configuration_arg('video_types') or self._video_types
|
||||
for video_type in video_types:
|
||||
if video_type not in self._video_types:
|
||||
self.report_warning(f'Unknown video_type: {video_type}')
|
||||
page_num = 0
|
||||
while True:
|
||||
page_num += 1
|
||||
params['page'] = page_num
|
||||
res = self._call_api(
|
||||
f'containers/{channel_id}/{video_type}.json', channel_id, query=params, fatal=False,
|
||||
note=f'Downloading {video_type.title()} JSON page {page_num}')
|
||||
|
||||
for video_id in res.get('response') or []:
|
||||
yield self.url_result(f'https://www.viki.com/videos/{video_id}', VikiIE.ie_key(), video_id)
|
||||
if not res.get('more'):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel = self._call_api(f'containers/{channel_id}.json', channel_id, 'Downloading channel JSON')
|
||||
self._check_errors(channel)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), channel_id,
|
||||
self.dict_selection(channel['titles'], 'en'),
|
||||
self.dict_selection(channel['descriptions'], 'en'))
|
||||
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urlhandle_detect_ext,
|
||||
urljoin,
|
||||
@@ -211,11 +212,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'width': int_or_none(key),
|
||||
'url': thumb,
|
||||
})
|
||||
thumbnail = video_data.get('thumbnail')
|
||||
if thumbnail:
|
||||
thumbnails.append({
|
||||
'url': thumbnail,
|
||||
})
|
||||
thumbnails.extend(traverse_obj(video_data, (('thumbnail', 'thumbnail_url'), {'url': {url_or_none}})))
|
||||
|
||||
owner = video_data.get('owner') or {}
|
||||
video_uploader_url = owner.get('url')
|
||||
@@ -388,7 +385,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/businessofsoftware',
|
||||
'uploader_id': 'businessofsoftware',
|
||||
'duration': 3610,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/376682406-f34043e7b766af6bef2af81366eacd6724f3fc3173179a11a97a1e26587c9529-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/376682406-f34043e7b766af6bef2af81366eacd6724f3fc3173179a11a97a1e26587c9529-d',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
@@ -413,7 +410,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 10,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
@@ -437,7 +434,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'timestamp': 1380339469,
|
||||
'upload_date': '20130928',
|
||||
'duration': 187,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/450239872-a05512d9b1e55d707a7c04365c10980f327b06d966351bc403a5d5d65c95e572-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/450239872-a05512d9b1e55d707a7c04365c10980f327b06d966351bc403a5d5d65c95e572-d',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
@@ -463,7 +460,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 62,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/452001751-8216e0571c251a09d7a8387550942d89f7f86f6398f8ed886e639b0dd50d3c90-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/452001751-8216e0571c251a09d7a8387550942d89f7f86f6398f8ed886e639b0dd50d3c90-d',
|
||||
'subtitles': {
|
||||
'de': 'count:3',
|
||||
'en': 'count:3',
|
||||
@@ -488,7 +485,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
|
||||
'uploader_id': 'user28849593',
|
||||
'duration': 118,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
},
|
||||
@@ -509,7 +506,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 60,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d',
|
||||
'like_count': int,
|
||||
'tags': 'count:11',
|
||||
},
|
||||
@@ -531,7 +528,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
|
||||
'upload_date': '20200225',
|
||||
'duration': 176,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d',
|
||||
'uploader_url': 'https://vimeo.com/frameworkla',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
@@ -556,7 +553,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 321,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d',
|
||||
'like_count': int,
|
||||
'tags': ['[the shining', 'vimeohq', 'cv', 'vimeo tribute]'],
|
||||
},
|
||||
@@ -596,7 +593,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'user18948128',
|
||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||
'duration': 10,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
@@ -633,7 +630,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'upload_date': '20150209',
|
||||
'timestamp': 1423518307,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/default',
|
||||
'duration': 10,
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/user20132939',
|
||||
@@ -666,7 +663,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'license': 'by-nc',
|
||||
'duration': 159,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/562802436-585eeb13b5020c6ac0f171a2234067938098f84737787df05ff0d767f6d54ee9-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/562802436-585eeb13b5020c6ac0f171a2234067938098f84737787df05ff0d767f6d54ee9-d',
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/aliniamedia',
|
||||
'release_date': '20160329',
|
||||
@@ -686,7 +683,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Firework Champions',
|
||||
'upload_date': '20150910',
|
||||
'timestamp': 1441901895,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d',
|
||||
'uploader_url': 'https://vimeo.com/fireworkchampions',
|
||||
'tags': 'count:6',
|
||||
'duration': 229,
|
||||
@@ -715,7 +712,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 336,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/541243181-b593db36a16db2f0096f655da3f5a4dc46b8766d77b0f440df937ecb0c418347-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/541243181-b593db36a16db2f0096f655da3f5a4dc46b8766d77b0f440df937ecb0c418347-d',
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/karimhd',
|
||||
'channel_url': 'https://vimeo.com/channels/staffpicks',
|
||||
@@ -740,7 +737,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'release_timestamp': 1627621014,
|
||||
'duration': 976,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1202249320-4ddb2c30398c0dc0ee059172d1bd5ea481ad12f0e0e3ad01d2266f56c744b015-d_1280',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1202249320-4ddb2c30398c0dc0ee059172d1bd5ea481ad12f0e0e3ad01d2266f56c744b015-d',
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/txwestcapital',
|
||||
'release_date': '20210730',
|
||||
@@ -764,7 +761,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Alex Howard',
|
||||
'uploader_id': 'user54729178',
|
||||
'uploader_url': 'https://vimeo.com/user54729178',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1520099929-[\da-f]+-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1520099929-[\da-f]+-d',
|
||||
'duration': 2636,
|
||||
'chapters': [
|
||||
{'start_time': 0, 'end_time': 10, 'title': '<Untitled Chapter 1>'},
|
||||
@@ -807,7 +804,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1018638656-[\da-f]+-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1018638656-[\da-f]+-d',
|
||||
},
|
||||
# 'params': {'format': 'Original'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@@ -824,7 +821,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'rajavirdi',
|
||||
'uploader_url': 'https://vimeo.com/rajavirdi',
|
||||
'duration': 309,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class VineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))']
|
||||
_TESTS = [{
|
||||
'url': 'https://vine.co/v/b9KOOWX7HUx',
|
||||
'md5': '2f36fed6235b16da96ce9b4dc890940d',
|
||||
'info_dict': {
|
||||
'id': 'b9KOOWX7HUx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chicken.',
|
||||
'alt_title': 'Vine by Jack',
|
||||
'timestamp': 1368997951,
|
||||
'upload_date': '20130519',
|
||||
'uploader': 'Jack',
|
||||
'uploader_id': '76',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/e192BnZnZ9V',
|
||||
'info_dict': {
|
||||
'id': 'e192BnZnZ9V',
|
||||
'ext': 'mp4',
|
||||
'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2',
|
||||
'alt_title': 'Vine by Pimry_zaa',
|
||||
'timestamp': 1436057405,
|
||||
'upload_date': '20150705',
|
||||
'uploader': 'Pimry_zaa',
|
||||
'uploader_id': '1135760698325307392',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/MYxVapFvz2z',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/v/bxVjBbZlPUH',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
f'https://archive.vine.co/posts/{video_id}.json', video_id)
|
||||
|
||||
def video_url(kind):
|
||||
for url_suffix in ('Url', 'URL'):
|
||||
format_url = data.get(f'video{kind}{url_suffix}')
|
||||
if format_url:
|
||||
return format_url
|
||||
|
||||
formats = []
|
||||
for quality, format_id in enumerate(('low', '', 'dash')):
|
||||
format_url = video_url(format_id.capitalize())
|
||||
if not format_url:
|
||||
continue
|
||||
# DASH link returns plain mp4
|
||||
if format_id == 'dash' and determine_ext(format_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id or 'standard',
|
||||
'quality': quality,
|
||||
})
|
||||
self._check_formats(formats, video_id)
|
||||
|
||||
username = data.get('username')
|
||||
|
||||
alt_title = format_field(username, None, 'Vine by %s')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data.get('description') or alt_title or 'Vine video',
|
||||
'alt_title': alt_title,
|
||||
'thumbnail': data.get('thumbnailUrl'),
|
||||
'timestamp': unified_timestamp(data.get('created')),
|
||||
'uploader': username,
|
||||
'uploader_id': data.get('userIdStr'),
|
||||
'view_count': int_or_none(data.get('loops')),
|
||||
'like_count': int_or_none(data.get('likes')),
|
||||
'comment_count': int_or_none(data.get('comments')),
|
||||
'repost_count': int_or_none(data.get('reposts')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class VineUserIE(InfoExtractor):
|
||||
IE_NAME = 'vine:user'
|
||||
_VALID_URL = r'https?://vine\.co/(?P<u>u/)?(?P<user>[^/]+)'
|
||||
_VINE_BASE_URL = 'https://vine.co/'
|
||||
_TESTS = [{
|
||||
'url': 'https://vine.co/itsruthb',
|
||||
'info_dict': {
|
||||
'id': 'itsruthb',
|
||||
'title': 'Ruth B',
|
||||
'description': '| Instagram/Twitter: itsruthb | still a lost boy from neverland',
|
||||
},
|
||||
'playlist_mincount': 611,
|
||||
}, {
|
||||
'url': 'https://vine.co/u/942914934646415360',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if VineIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
user = mobj.group('user')
|
||||
u = mobj.group('u')
|
||||
|
||||
profile_url = '{}api/users/profiles/{}{}'.format(
|
||||
self._VINE_BASE_URL, 'vanity/' if not u else '', user)
|
||||
profile_data = self._download_json(
|
||||
profile_url, user, note='Downloading user profile data')
|
||||
|
||||
data = profile_data['data']
|
||||
user_id = data.get('userId') or data['userIdStr']
|
||||
profile = self._download_json(
|
||||
f'https://archive.vine.co/profiles/{user_id}.json', user_id)
|
||||
entries = [
|
||||
self.url_result(
|
||||
f'https://vine.co/v/{post_id}', ie='Vine', video_id=post_id)
|
||||
for post_id in profile['posts']
|
||||
if post_id and isinstance(post_id, str)]
|
||||
return self.playlist_result(
|
||||
entries, user, profile.get('username'), profile.get('description'))
|
||||
@@ -17,10 +17,10 @@ from ..utils import (
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
@@ -29,6 +29,7 @@ from ..utils import (
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class VKBaseIE(InfoExtractor):
|
||||
@@ -91,17 +92,17 @@ class VKBaseIE(InfoExtractor):
|
||||
class VKIE(VKBaseIE):
|
||||
IE_NAME = 'vk'
|
||||
IE_DESC = 'VK'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk(?:(?:video)?\.ru|\.com)/video_ext\.php.+?)\1']
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:
|
||||
(?:(?:m|new)\.)?vk\.com/video_|
|
||||
(?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/video_|
|
||||
(?:www\.)?daxab\.com/
|
||||
)
|
||||
ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
|
||||
(?:
|
||||
(?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
|
||||
(?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/(?:.+?\?.*?z=)?(?:video|clip)|
|
||||
(?:www\.)?daxab\.com/embed/
|
||||
)
|
||||
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
|
||||
@@ -110,11 +111,12 @@ class VKIE(VKBaseIE):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
|
||||
'url': 'https://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
|
||||
'info_dict': {
|
||||
'id': '-77521_162222515',
|
||||
'ext': 'mp4',
|
||||
'title': 'ProtivoGunz - Хуёвая песня',
|
||||
'description': 'Видео из официальной группы Noize MC\nhttp://vk.com/noizemc',
|
||||
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
||||
'uploader_id': '39545378',
|
||||
'duration': 195,
|
||||
@@ -127,7 +129,7 @@ class VKIE(VKBaseIE):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
},
|
||||
{
|
||||
'url': 'http://vk.com/video205387401_165548505',
|
||||
'url': 'https://vk.com/video205387401_165548505',
|
||||
'info_dict': {
|
||||
'id': '205387401_165548505',
|
||||
'ext': 'mp4',
|
||||
@@ -164,6 +166,7 @@ class VKIE(VKBaseIE):
|
||||
'id': '-93049196_456239755',
|
||||
'ext': 'mp4',
|
||||
'title': '8 серия (озвучка)',
|
||||
'description': 'Видео из официальной группы Noize MC\nhttp://vk.com/noizemc',
|
||||
'duration': 8383,
|
||||
'comment_count': int,
|
||||
'uploader': 'Dizi2021',
|
||||
@@ -182,10 +185,10 @@ class VKIE(VKBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
||||
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
||||
'duration': 178,
|
||||
'duration': 179,
|
||||
'upload_date': '20130117',
|
||||
'uploader': "Children's Joy Foundation Inc.",
|
||||
'uploader_id': 'thecjf',
|
||||
'uploader_id': '@CJFIofficial',
|
||||
'view_count': int,
|
||||
'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
|
||||
'availability': 'public',
|
||||
@@ -193,7 +196,7 @@ class VKIE(VKBaseIE):
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'channel': 'Children\'s Joy Foundation Inc.',
|
||||
'uploader_url': 'http://www.youtube.com/user/thecjf',
|
||||
'uploader_url': 'https://www.youtube.com/@CJFIofficial',
|
||||
'thumbnail': r're:https?://.+\.jpg$',
|
||||
'tags': 'count:27',
|
||||
'start_time': 0.0,
|
||||
@@ -201,6 +204,7 @@ class VKIE(VKBaseIE):
|
||||
'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
'timestamp': 1358394935,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -222,6 +226,7 @@ class VKIE(VKBaseIE):
|
||||
'thumbnail': r're:https?://.+x1080$',
|
||||
'tags': list,
|
||||
},
|
||||
'skip': 'This video has been deleted and is no longer available.',
|
||||
},
|
||||
{
|
||||
'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
|
||||
@@ -235,13 +240,14 @@ class VKIE(VKBaseIE):
|
||||
'timestamp': 1664995597,
|
||||
'title': 'Clip by @madempress',
|
||||
'upload_date': '20221005',
|
||||
'uploader': 'Шальная императрица',
|
||||
'uploader': 'Шальная Императрица',
|
||||
'uploader_id': '-74006511',
|
||||
'description': 'md5:f9315f7786fa0e84e75e4f824a48b056',
|
||||
},
|
||||
},
|
||||
{
|
||||
# video key is extra_data not url\d+
|
||||
'url': 'http://vk.com/video-110305615_171782105',
|
||||
'url': 'https://vk.com/video-110305615_171782105',
|
||||
'md5': 'e13fcda136f99764872e739d13fac1d1',
|
||||
'info_dict': {
|
||||
'id': '-110305615_171782105',
|
||||
@@ -273,6 +279,26 @@ class VKIE(VKBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No formats found',
|
||||
},
|
||||
{
|
||||
'note': 'video has chapters',
|
||||
'url': 'https://vkvideo.ru/video-18403220_456239696',
|
||||
'info_dict': {
|
||||
'id': '-18403220_456239696',
|
||||
'ext': 'mp4',
|
||||
'title': 'Трамп отменяет гранты // DeepSeek - Революция в ИИ // Илон Маск читер',
|
||||
'description': 'md5:b112ea9de53683b6d03d29076f62eec2',
|
||||
'uploader': 'Руслан Усачев',
|
||||
'uploader_id': '-18403220',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 1983,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'chapters': 'count:21',
|
||||
'timestamp': 1738252883,
|
||||
'upload_date': '20250130',
|
||||
},
|
||||
},
|
||||
{
|
||||
# live stream, hls and rtmp links, most likely already finished live
|
||||
@@ -312,7 +338,16 @@ class VKIE(VKBaseIE):
|
||||
{
|
||||
'url': 'https://vk.com/clip30014565_456240946',
|
||||
'only_matching': True,
|
||||
}]
|
||||
},
|
||||
{
|
||||
'url': 'https://vkvideo.ru/video-127553155_456242961',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://vk.ru/video-220754053_456242564',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
@@ -338,7 +373,7 @@ class VKIE(VKBaseIE):
|
||||
video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id'))
|
||||
|
||||
info_page = self._download_webpage(
|
||||
'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
|
||||
'https://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
|
||||
|
||||
error_message = self._html_search_regex(
|
||||
[r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
|
||||
@@ -432,11 +467,10 @@ class VKIE(VKBaseIE):
|
||||
if m_opts_url:
|
||||
opts_url = m_opts_url.group(1)
|
||||
if opts_url.startswith('//'):
|
||||
opts_url = 'http:' + opts_url
|
||||
opts_url = 'https:' + opts_url
|
||||
return self.url_result(opts_url)
|
||||
|
||||
data = player['params'][0]
|
||||
title = unescapeHTML(data['md_title'])
|
||||
|
||||
# 2 = live
|
||||
# 3 = post live (finished live)
|
||||
@@ -494,17 +528,29 @@ class VKIE(VKBaseIE):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': data.get('jpg'),
|
||||
'uploader': data.get('md_author'),
|
||||
'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
|
||||
'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(mv_data, {
|
||||
'title': ('title', {unescapeHTML}),
|
||||
'description': ('desc', {clean_html}, filter),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'comment_count': ('commcount', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(data, {
|
||||
'title': ('md_title', {unescapeHTML}),
|
||||
'description': ('description', {clean_html}, filter),
|
||||
'thumbnail': ('jpg', {url_or_none}),
|
||||
'uploader': ('md_author', {str}),
|
||||
'uploader_id': (('author_id', 'authorId'), {str_or_none}, any),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), {
|
||||
'title': ('text', {str}),
|
||||
'start_time': 'time',
|
||||
}),
|
||||
}),
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': int_or_none(mv_data.get('likes')),
|
||||
'comment_count': int_or_none(mv_data.get('commcount')),
|
||||
'is_live': is_live,
|
||||
'subtitles': subtitles,
|
||||
'_format_sort_fields': ('res', 'source'),
|
||||
}
|
||||
|
||||
@@ -512,8 +558,11 @@ class VKIE(VKBaseIE):
|
||||
class VKUserVideosIE(VKBaseIE):
|
||||
IE_NAME = 'vk:uservideos'
|
||||
IE_DESC = "VK - User's Videos"
|
||||
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/(?:playlist/)?(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
|
||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||
_BASE_URL_RE = r'https?://(?:(?:m|new)\.)?vk(?:video\.ru|\.com/video)'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/playlist/(?P<id>-?\d+_\d+)',
|
||||
rf'{_BASE_URL_RE}/(?P<id>@[^/?#]+)(?:/all)?/?(?!\?.*\bz=video)(?:[?#]|$)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://vk.com/video/@mobidevices',
|
||||
'info_dict': {
|
||||
@@ -527,12 +576,20 @@ class VKUserVideosIE(VKBaseIE):
|
||||
},
|
||||
'playlist_mincount': 182,
|
||||
}, {
|
||||
'url': 'https://vk.com/video/playlist/-174476437_2',
|
||||
'url': 'https://vkvideo.ru/playlist/-204353299_426',
|
||||
'info_dict': {
|
||||
'id': '-174476437_playlist_2',
|
||||
'title': 'Анонсы',
|
||||
'id': '-204353299_playlist_426',
|
||||
},
|
||||
'playlist_mincount': 108,
|
||||
'playlist_mincount': 33,
|
||||
}, {
|
||||
'url': 'https://vk.com/video/@gorkyfilmstudio/all',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vkvideo.ru/@mobidevices',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vk.com/video/playlist/-174476437_2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
|
||||
|
||||
@@ -552,7 +609,7 @@ class VKUserVideosIE(VKBaseIE):
|
||||
v = self._VIDEO._make(video[:2])
|
||||
video_id = '%d_%d' % (v.owner_id, v.id)
|
||||
yield self.url_result(
|
||||
'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
|
||||
'https://vk.com/video' + video_id, VKIE.ie_key(), video_id)
|
||||
if count >= total:
|
||||
break
|
||||
video_list_json = self._download_payload('al_video', page_id, {
|
||||
@@ -561,23 +618,25 @@ class VKUserVideosIE(VKBaseIE):
|
||||
'oid': page_id,
|
||||
'section': section,
|
||||
})[0][section]
|
||||
count += video_list_json['count']
|
||||
new_count = video_list_json['count']
|
||||
if not new_count:
|
||||
self.to_screen(f'{page_id}: Skipping {total - count} unavailable videos')
|
||||
break
|
||||
count += new_count
|
||||
video_list = video_list_json['list']
|
||||
|
||||
def _real_extract(self, url):
|
||||
u_id, section = self._match_valid_url(url).groups()
|
||||
u_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, u_id)
|
||||
|
||||
if u_id.startswith('@'):
|
||||
page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
|
||||
elif '_' in u_id:
|
||||
page_id, section = u_id.split('_', 1)
|
||||
section = f'playlist_{section}'
|
||||
page_id = traverse_obj(
|
||||
self._search_json(r'\bvar newCur\s*=', webpage, 'cursor data', u_id),
|
||||
('oid', {int}, {str_or_none}, {require('page id')}))
|
||||
section = traverse_obj(parse_qs(url), ('section', 0)) or 'all'
|
||||
else:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
if not section:
|
||||
section = 'all'
|
||||
page_id, _, section = u_id.partition('_')
|
||||
section = f'playlist_{section}'
|
||||
|
||||
playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
|
||||
return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title)
|
||||
@@ -717,7 +776,7 @@ class VKWallPostIE(VKBaseIE):
|
||||
|
||||
|
||||
class VKPlayBaseIE(InfoExtractor):
|
||||
_BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/'
|
||||
_BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vk(?:play|video)\.ru)/'
|
||||
_RESOLUTIONS = {
|
||||
'tiny': '256x144',
|
||||
'lowest': '426x240',
|
||||
@@ -797,6 +856,9 @@ class VKPlayIE(VKPlayBaseIE):
|
||||
}, {
|
||||
'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.vkvideo.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -839,6 +901,9 @@ class VKPlayLiveIE(VKPlayBaseIE):
|
||||
}, {
|
||||
'url': 'https://live.vkplay.ru/lebwa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.vkvideo.ru/panterka',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -2,31 +2,33 @@ import json
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .gigya import GigyaBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
jwt_encode_hs256,
|
||||
make_archive_id,
|
||||
merge_dicts,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class VRTBaseIE(GigyaBaseIE):
|
||||
class VRTBaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
_PLAYER_INFO = {
|
||||
'platform': 'desktop',
|
||||
@@ -37,11 +39,11 @@ class VRTBaseIE(GigyaBaseIE):
|
||||
'device': 'undefined (undefined)',
|
||||
'os': {
|
||||
'name': 'Windows',
|
||||
'version': 'x86_64',
|
||||
'version': '10',
|
||||
},
|
||||
'player': {
|
||||
'name': 'VRT web player',
|
||||
'version': '2.7.4-prod-2023-04-19T06:05:45',
|
||||
'version': '5.1.1-prod-2025-02-14T08:44:16"',
|
||||
},
|
||||
}
|
||||
# From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
|
||||
@@ -90,20 +92,21 @@ class VRTBaseIE(GigyaBaseIE):
|
||||
def _call_api(self, video_id, client='null', id_token=None, version='v2'):
|
||||
player_info = {'exp': (round(time.time(), 3) + 900), **self._PLAYER_INFO}
|
||||
player_token = self._download_json(
|
||||
'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens',
|
||||
video_id, 'Downloading player token', headers={
|
||||
f'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/{version}/tokens',
|
||||
video_id, 'Downloading player token', 'Failed to download player token', headers={
|
||||
**self.geo_verification_headers(),
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'identityToken': id_token or {},
|
||||
'identityToken': id_token or '',
|
||||
'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={
|
||||
'kid': self._JWT_KEY_ID,
|
||||
}).decode(),
|
||||
}, separators=(',', ':')).encode())['vrtPlayerToken']
|
||||
|
||||
return self._download_json(
|
||||
f'https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}',
|
||||
video_id, 'Downloading API JSON', query={
|
||||
# The URL below redirects to https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}
|
||||
f'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/{version}/videos/{video_id}',
|
||||
video_id, 'Downloading API JSON', 'Failed to download API JSON', query={
|
||||
'vrtPlayerToken': player_token,
|
||||
'client': client,
|
||||
}, expected_status=400)
|
||||
@@ -177,215 +180,286 @@ class VRTIE(VRTBaseIE):
|
||||
|
||||
|
||||
class VrtNUIE(VRTBaseIE):
|
||||
IE_DESC = 'VRT MAX'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
|
||||
IE_NAME = 'vrtmax'
|
||||
IE_DESC = 'VRT MAX (formerly VRT NU)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?:vrtnu|vrtmax)/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# CONTENT_IS_AGE_RESTRICTED
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/de-ideale-wereld/2023-vj/de-ideale-wereld-d20230116/',
|
||||
'url': 'https://www.vrt.be/vrtmax/a-z/ket---doc/trailer/ket---doc-trailer-s6/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-855b00a8-6ce2-4032-ac4f-1fcf3ae78524$vid-d2243aa1-ec46-4e34-a55b-92568459906f',
|
||||
'id': 'pbs-pub-c8a78645-5d3e-468a-89ec-6f3ed5534bd5$vid-242ddfe9-18f5-4e16-ab45-09b122a19251',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tom Waes',
|
||||
'description': 'Satirisch actualiteitenmagazine met Ella Leyers. Tom Waes is te gast.',
|
||||
'timestamp': 1673905125,
|
||||
'release_timestamp': 1673905125,
|
||||
'series': 'De ideale wereld',
|
||||
'season_id': '1672830988794',
|
||||
'episode': 'Aflevering 1',
|
||||
'episode_number': 1,
|
||||
'episode_id': '1672830988861',
|
||||
'display_id': 'de-ideale-wereld-d20230116',
|
||||
'channel': 'VRT',
|
||||
'duration': 1939.0,
|
||||
'thumbnail': 'https://images.vrt.be/orig/2023/01/10/1bb39cb3-9115-11ed-b07d-02b7b76bf47f.jpg',
|
||||
'release_date': '20230116',
|
||||
'upload_date': '20230116',
|
||||
'age_limit': 12,
|
||||
'channel': 'ketnet',
|
||||
'description': 'Neem een kijkje in de bijzondere wereld van deze Ketnetters.',
|
||||
'display_id': 'ket---doc-trailer-s6',
|
||||
'duration': 30.0,
|
||||
'episode': 'Reeks 6 volledig vanaf 3 maart',
|
||||
'episode_id': '1739450401467',
|
||||
'season': 'Trailer',
|
||||
'season_id': '1739450401467',
|
||||
'series': 'Ket & Doc',
|
||||
'thumbnail': 'https://images.vrt.be/orig/2025/02/21/63f07122-5bbd-4ca1-b42e-8565c6cd95df.jpg',
|
||||
'timestamp': 1740373200,
|
||||
'title': 'Reeks 6 volledig vanaf 3 maart',
|
||||
'upload_date': '20250224',
|
||||
'_old_archive_ids': [
|
||||
'canvas pbs-pub-c8a78645-5d3e-468a-89ec-6f3ed5534bd5$vid-242ddfe9-18f5-4e16-ab45-09b122a19251',
|
||||
'ketnet pbs-pub-c8a78645-5d3e-468a-89ec-6f3ed5534bd5$vid-242ddfe9-18f5-4e16-ab45-09b122a19251',
|
||||
],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/buurman--wat-doet-u-nu-/6/buurman--wat-doet-u-nu--s6-trailer/',
|
||||
'url': 'https://www.vrt.be/vrtmax/a-z/meisjes/6/meisjes-s6a5/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-ad4050eb-d9e5-48c2-9ec8-b6c355032361$vid-0465537a-34a8-4617-8352-4d8d983b4eee',
|
||||
'id': 'pbs-pub-97b541ab-e05c-43b9-9a40-445702ef7189$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trailer seizoen 6 \'Buurman, wat doet u nu?\'',
|
||||
'description': 'md5:197424726c61384b4e5c519f16c0cf02',
|
||||
'timestamp': 1652940000,
|
||||
'release_timestamp': 1652940000,
|
||||
'series': 'Buurman, wat doet u nu?',
|
||||
'season': 'Seizoen 6',
|
||||
'channel': 'ketnet',
|
||||
'description': 'md5:713793f15cbf677f66200b36b7b1ec5a',
|
||||
'display_id': 'meisjes-s6a5',
|
||||
'duration': 1336.02,
|
||||
'episode': 'Week 5',
|
||||
'episode_id': '1684157692901',
|
||||
'episode_number': 5,
|
||||
'season': '6',
|
||||
'season_id': '1684157692901',
|
||||
'season_number': 6,
|
||||
'season_id': '1652344200907',
|
||||
'episode': 'Aflevering 0',
|
||||
'episode_number': 0,
|
||||
'episode_id': '1652951873524',
|
||||
'display_id': 'buurman--wat-doet-u-nu--s6-trailer',
|
||||
'channel': 'VRT',
|
||||
'duration': 33.13,
|
||||
'thumbnail': 'https://images.vrt.be/orig/2022/05/23/3c234d21-da83-11ec-b07d-02b7b76bf47f.jpg',
|
||||
'release_date': '20220519',
|
||||
'upload_date': '20220519',
|
||||
'series': 'Meisjes',
|
||||
'thumbnail': 'https://images.vrt.be/orig/2023/05/14/bf526ae0-f1d9-11ed-91d7-02b7b76bf47f.jpg',
|
||||
'timestamp': 1685251800,
|
||||
'title': 'Week 5',
|
||||
'upload_date': '20230528',
|
||||
'_old_archive_ids': [
|
||||
'canvas pbs-pub-97b541ab-e05c-43b9-9a40-445702ef7189$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
|
||||
'ketnet pbs-pub-97b541ab-e05c-43b9-9a40-445702ef7189$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
|
||||
],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/taboe/3/taboe-s3a4/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-f50faa3a-1778-46b6-9117-4ba85f197703$vid-547507fe-1c8b-4394-b361-21e627cbd0fd',
|
||||
'ext': 'mp4',
|
||||
'channel': 'een',
|
||||
'description': 'md5:bf61345a95eca9393a95de4a7a54b5c6',
|
||||
'display_id': 'taboe-s3a4',
|
||||
'duration': 2882.02,
|
||||
'episode': 'Mensen met het syndroom van Gilles de la Tourette',
|
||||
'episode_id': '1739055911734',
|
||||
'episode_number': 4,
|
||||
'season': '3',
|
||||
'season_id': '1739055911734',
|
||||
'season_number': 3,
|
||||
'series': 'Taboe',
|
||||
'thumbnail': 'https://images.vrt.be/orig/2025/02/19/8198496c-d1ae-4bca-9a48-761cf3ea3ff2.jpg',
|
||||
'timestamp': 1740286800,
|
||||
'title': 'Mensen met het syndroom van Gilles de la Tourette',
|
||||
'upload_date': '20250223',
|
||||
'_old_archive_ids': [
|
||||
'canvas pbs-pub-f50faa3a-1778-46b6-9117-4ba85f197703$vid-547507fe-1c8b-4394-b361-21e627cbd0fd',
|
||||
'ketnet pbs-pub-f50faa3a-1778-46b6-9117-4ba85f197703$vid-547507fe-1c8b-4394-b361-21e627cbd0fd',
|
||||
],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrtnu'
|
||||
_authenticated = False
|
||||
|
||||
_TOKEN_COOKIE_DOMAIN = '.www.vrt.be'
|
||||
_ACCESS_TOKEN_COOKIE_NAME = 'vrtnu-site_profile_at'
|
||||
_REFRESH_TOKEN_COOKIE_NAME = 'vrtnu-site_profile_rt'
|
||||
_VIDEO_TOKEN_COOKIE_NAME = 'vrtnu-site_profile_vt'
|
||||
_VIDEO_PAGE_QUERY = '''
|
||||
query VideoPage($pageId: ID!) {
|
||||
page(id: $pageId) {
|
||||
... on EpisodePage {
|
||||
episode {
|
||||
ageRaw
|
||||
description
|
||||
durationRaw
|
||||
episodeNumberRaw
|
||||
id
|
||||
name
|
||||
onTimeRaw
|
||||
program {
|
||||
title
|
||||
}
|
||||
season {
|
||||
id
|
||||
titleRaw
|
||||
}
|
||||
title
|
||||
brand
|
||||
}
|
||||
ldjson
|
||||
player {
|
||||
image {
|
||||
templateUrl
|
||||
}
|
||||
modes {
|
||||
streamId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
def _fetch_tokens(self):
|
||||
has_credentials = self._get_login_info()[0]
|
||||
access_token = self._get_vrt_cookie(self._ACCESS_TOKEN_COOKIE_NAME)
|
||||
video_token = self._get_vrt_cookie(self._VIDEO_TOKEN_COOKIE_NAME)
|
||||
|
||||
if (access_token and not self._is_jwt_token_expired(access_token)
|
||||
and video_token and not self._is_jwt_token_expired(video_token)):
|
||||
return access_token, video_token
|
||||
|
||||
if has_credentials:
|
||||
access_token, video_token = self.cache.load(self._NETRC_MACHINE, 'token_data', default=(None, None))
|
||||
|
||||
if (access_token and not self._is_jwt_token_expired(access_token)
|
||||
and video_token and not self._is_jwt_token_expired(video_token)):
|
||||
self.write_debug('Restored tokens from cache')
|
||||
self._set_cookie(self._TOKEN_COOKIE_DOMAIN, self._ACCESS_TOKEN_COOKIE_NAME, access_token)
|
||||
self._set_cookie(self._TOKEN_COOKIE_DOMAIN, self._VIDEO_TOKEN_COOKIE_NAME, video_token)
|
||||
return access_token, video_token
|
||||
|
||||
if not self._get_vrt_cookie(self._REFRESH_TOKEN_COOKIE_NAME):
|
||||
return None, None
|
||||
|
||||
self._request_webpage(
|
||||
'https://www.vrt.be/vrtmax/sso/refresh', None,
|
||||
note='Refreshing tokens', errnote='Failed to refresh tokens', fatal=False)
|
||||
|
||||
access_token = self._get_vrt_cookie(self._ACCESS_TOKEN_COOKIE_NAME)
|
||||
video_token = self._get_vrt_cookie(self._VIDEO_TOKEN_COOKIE_NAME)
|
||||
|
||||
if not access_token or not video_token:
|
||||
self.cache.store(self._NETRC_MACHINE, 'refresh_token', None)
|
||||
self.cookiejar.clear(self._TOKEN_COOKIE_DOMAIN, '/vrtmax/sso', self._REFRESH_TOKEN_COOKIE_NAME)
|
||||
msg = 'Refreshing of tokens failed'
|
||||
if not has_credentials:
|
||||
self.report_warning(msg)
|
||||
return None, None
|
||||
self.report_warning(f'{msg}. Re-logging in')
|
||||
return self._perform_login(*self._get_login_info())
|
||||
|
||||
if has_credentials:
|
||||
self.cache.store(self._NETRC_MACHINE, 'token_data', (access_token, video_token))
|
||||
|
||||
return access_token, video_token
|
||||
|
||||
def _get_vrt_cookie(self, cookie_name):
|
||||
# Refresh token cookie is scoped to /vrtmax/sso, others are scoped to /
|
||||
return try_call(lambda: self._get_cookies('https://www.vrt.be/vrtmax/sso')[cookie_name].value)
|
||||
|
||||
@staticmethod
|
||||
def _is_jwt_token_expired(token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 300
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
auth_info = self._gigya_login({
|
||||
'APIKey': '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy',
|
||||
'targetEnv': 'jssdk',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
'authMode': 'cookie',
|
||||
})
|
||||
refresh_token = self._get_vrt_cookie(self._REFRESH_TOKEN_COOKIE_NAME)
|
||||
if refresh_token and not self._is_jwt_token_expired(refresh_token):
|
||||
self.write_debug('Using refresh token from logged-in cookies; skipping login with credentials')
|
||||
return
|
||||
|
||||
if auth_info.get('errorDetails'):
|
||||
raise ExtractorError(f'Unable to login. VrtNU said: {auth_info["errorDetails"]}', expected=True)
|
||||
refresh_token = self.cache.load(self._NETRC_MACHINE, 'refresh_token', default=None)
|
||||
if refresh_token and not self._is_jwt_token_expired(refresh_token):
|
||||
self.write_debug('Restored refresh token from cache')
|
||||
self._set_cookie(self._TOKEN_COOKIE_DOMAIN, self._REFRESH_TOKEN_COOKIE_NAME, refresh_token, path='/vrtmax/sso')
|
||||
return
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry
|
||||
for retry in self.RetryManager():
|
||||
if retry.attempt > 1:
|
||||
self._sleep(1, None)
|
||||
try:
|
||||
self._request_webpage(
|
||||
'https://token.vrt.be/vrtnuinitlogin', None, note='Requesting XSRF Token',
|
||||
errnote='Could not get XSRF Token', query={
|
||||
'provider': 'site',
|
||||
'destination': 'https://www.vrt.be/vrtnu/',
|
||||
})
|
||||
self._request_webpage(
|
||||
'https://login.vrt.be/perform_login', None,
|
||||
note='Performing login', errnote='Login failed',
|
||||
query={'client_id': 'vrtnu-site'}, data=urlencode_postdata({
|
||||
'UID': auth_info['UID'],
|
||||
'UIDSignature': auth_info['UIDSignature'],
|
||||
'signatureTimestamp': auth_info['signatureTimestamp'],
|
||||
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
|
||||
}))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
self._request_webpage(
|
||||
'https://www.vrt.be/vrtmax/sso/login', None,
|
||||
note='Getting session cookies', errnote='Failed to get session cookies')
|
||||
|
||||
self._authenticated = True
|
||||
login_data = self._download_json(
|
||||
'https://login.vrt.be/perform_login', None, data=json.dumps({
|
||||
'clientId': 'vrtnu-site',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Oidcxsrf': self._get_cookies('https://login.vrt.be')['OIDCXSRF'].value,
|
||||
}, note='Logging in', errnote='Login failed', expected_status=403)
|
||||
if login_data.get('errorCode'):
|
||||
raise ExtractorError(f'Login failed: {login_data.get("errorMessage")}', expected=True)
|
||||
|
||||
self._request_webpage(
|
||||
login_data['redirectUrl'], None,
|
||||
note='Getting access token', errnote='Failed to get access token')
|
||||
|
||||
access_token = self._get_vrt_cookie(self._ACCESS_TOKEN_COOKIE_NAME)
|
||||
video_token = self._get_vrt_cookie(self._VIDEO_TOKEN_COOKIE_NAME)
|
||||
refresh_token = self._get_vrt_cookie(self._REFRESH_TOKEN_COOKIE_NAME)
|
||||
|
||||
if not all((access_token, video_token, refresh_token)):
|
||||
raise ExtractorError('Unable to extract token cookie values')
|
||||
|
||||
self.cache.store(self._NETRC_MACHINE, 'token_data', (access_token, video_token))
|
||||
self.cache.store(self._NETRC_MACHINE, 'refresh_token', refresh_token)
|
||||
|
||||
return access_token, video_token
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
details = self._download_json(
|
||||
f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json',
|
||||
display_id, 'Downloading asset JSON', 'Unable to download asset JSON')['details']
|
||||
access_token, video_token = self._fetch_tokens()
|
||||
|
||||
watch_info = traverse_obj(details, (
|
||||
'actions', lambda _, v: v['type'] == 'watch-episode', {dict}), get_all=False) or {}
|
||||
video_id = join_nonempty(
|
||||
'episodePublicationId', 'episodeVideoId', delim='$', from_dict=watch_info)
|
||||
if '$' not in video_id:
|
||||
raise ExtractorError('Unable to extract video ID')
|
||||
metadata = self._download_json(
|
||||
f'https://www.vrt.be/vrtnu-api/graphql{"" if access_token else "/public"}/v1',
|
||||
display_id, 'Downloading asset JSON', 'Unable to download asset JSON',
|
||||
data=json.dumps({
|
||||
'operationName': 'VideoPage',
|
||||
'query': self._VIDEO_PAGE_QUERY,
|
||||
'variables': {'pageId': urllib.parse.urlparse(url).path},
|
||||
}).encode(),
|
||||
headers=filter_dict({
|
||||
'Authorization': f'Bearer {access_token}' if access_token else None,
|
||||
'Content-Type': 'application/json',
|
||||
'x-vrt-client-name': 'WEB',
|
||||
'x-vrt-client-version': '1.5.9',
|
||||
'x-vrt-zone': 'default',
|
||||
}))['data']['page']
|
||||
|
||||
vrtnutoken = self._download_json(
|
||||
'https://token.vrt.be/refreshtoken', video_id, note='Retrieving vrtnutoken',
|
||||
errnote='Token refresh failed')['vrtnutoken'] if self._authenticated else None
|
||||
video_id = metadata['player']['modes'][0]['streamId']
|
||||
|
||||
video_info = self._call_api(video_id, 'vrtnu-web@PROD', vrtnutoken)
|
||||
try:
|
||||
streaming_info = self._call_api(video_id, 'vrtnu-web@PROD', id_token=video_token)
|
||||
except ExtractorError as e:
|
||||
if not video_token and isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
if 'title' not in video_info:
|
||||
code = video_info.get('code')
|
||||
if code in ('AUTHENTICATION_REQUIRED', 'CONTENT_IS_AGE_RESTRICTED'):
|
||||
self.raise_login_required(code, method='password')
|
||||
elif code in ('INVALID_LOCATION', 'CONTENT_AVAILABLE_ONLY_IN_BE'):
|
||||
formats, subtitles = self._extract_formats_and_subtitles(streaming_info, video_id)
|
||||
|
||||
code = traverse_obj(streaming_info, ('code', {str}))
|
||||
if not formats and code:
|
||||
if code in ('CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS', 'CONTENT_AVAILABLE_ONLY_IN_BE', 'CONTENT_UNAVAILABLE_VIA_PROXY'):
|
||||
self.raise_geo_restricted(countries=['BE'])
|
||||
elif code == 'CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS':
|
||||
if not self._authenticated:
|
||||
self.raise_login_required(code, method='password')
|
||||
self.raise_geo_restricted(countries=['BE'])
|
||||
raise ExtractorError(code, expected=True)
|
||||
|
||||
formats, subtitles = self._extract_formats_and_subtitles(video_info, video_id)
|
||||
elif code in ('CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS', 'CONTENT_IS_AGE_RESTRICTED', 'CONTENT_REQUIRES_AUTHENTICATION'):
|
||||
self.raise_login_required()
|
||||
else:
|
||||
self.raise_no_formats(f'Unable to extract formats: {code}')
|
||||
|
||||
return {
|
||||
**traverse_obj(details, {
|
||||
'title': 'title',
|
||||
'description': ('description', {clean_html}),
|
||||
'timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}),
|
||||
'release_timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}),
|
||||
'series': ('data', 'program', 'title'),
|
||||
'season': ('data', 'season', 'title', 'value'),
|
||||
'season_number': ('data', 'season', 'title', 'raw', {int_or_none}),
|
||||
'season_id': ('data', 'season', 'id', {str_or_none}),
|
||||
'episode': ('data', 'episode', 'number', 'value', {str_or_none}),
|
||||
'episode_number': ('data', 'episode', 'number', 'raw', {int_or_none}),
|
||||
'episode_id': ('data', 'episode', 'id', {str_or_none}),
|
||||
'age_limit': ('data', 'episode', 'age', 'raw', {parse_age_limit}),
|
||||
}),
|
||||
'duration': float_or_none(streaming_info.get('duration'), 1000),
|
||||
'thumbnail': url_or_none(streaming_info.get('posterImageUrl')),
|
||||
**self._json_ld(traverse_obj(metadata, ('ldjson', ..., {json.loads})), video_id, fatal=False),
|
||||
**traverse_obj(metadata, ('episode', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('onTimeRaw', {parse_iso8601}),
|
||||
'series': ('program', 'title', {str}),
|
||||
'season': ('season', 'titleRaw', {str}),
|
||||
'season_number': ('season', 'titleRaw', {int_or_none}),
|
||||
'season_id': ('id', {str_or_none}),
|
||||
'episode': ('title', {str}),
|
||||
'episode_number': ('episodeNumberRaw', {int_or_none}),
|
||||
'episode_id': ('id', {str_or_none}),
|
||||
'age_limit': ('ageRaw', {parse_age_limit}),
|
||||
'channel': ('brand', {str}),
|
||||
'duration': ('durationRaw', {parse_duration}),
|
||||
})),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'channel': 'VRT',
|
||||
'formats': formats,
|
||||
'duration': float_or_none(video_info.get('duration'), 1000),
|
||||
'thumbnail': url_or_none(video_info.get('posterImageUrl')),
|
||||
'subtitles': subtitles,
|
||||
'_old_archive_ids': [make_archive_id('Canvas', video_id)],
|
||||
}
|
||||
|
||||
|
||||
class KetnetIE(VRTBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Meisjes',
|
||||
'episode': 'Reeks 6: Week 5',
|
||||
'season': 'Reeks 6',
|
||||
'series': 'Meisjes',
|
||||
'timestamp': 1685251800,
|
||||
'upload_date': '20230528',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://senior-bff.ketnet.be/graphql', display_id, query={
|
||||
'query': '''{
|
||||
video(id: "content/ketnet/nl/%s.model.json") {
|
||||
description
|
||||
episodeNr
|
||||
imageUrl
|
||||
mediaReference
|
||||
programTitle
|
||||
publicationDate
|
||||
seasonTitle
|
||||
subtitleVideodetail
|
||||
titleVideodetail
|
||||
}
|
||||
}''' % display_id, # noqa: UP031
|
||||
})['data']['video']
|
||||
|
||||
video_id = urllib.parse.unquote(video['mediaReference'])
|
||||
data = self._call_api(video_id, 'ketnet@PROD', version='v1')
|
||||
formats, subtitles = self._extract_formats_and_subtitles(data, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'_old_archive_ids': [make_archive_id('Canvas', video_id)],
|
||||
**traverse_obj(video, {
|
||||
'title': ('titleVideodetail', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'timestamp': ('publicationDate', {parse_iso8601}),
|
||||
'series': ('programTitle', {str}),
|
||||
'season': ('seasonTitle', {str}),
|
||||
'episode': ('subtitleVideodetail', {str}),
|
||||
'episode_number': ('episodeNr', {int_or_none}),
|
||||
}),
|
||||
'_old_archive_ids': [make_archive_id('Canvas', video_id),
|
||||
make_archive_id('Ketnet', video_id)],
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
truncate_string,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
@@ -96,7 +97,8 @@ class WeiboBaseIE(InfoExtractor):
|
||||
})
|
||||
return formats
|
||||
|
||||
def _parse_video_info(self, video_info, video_id=None):
|
||||
def _parse_video_info(self, video_info):
|
||||
video_id = traverse_obj(video_info, (('id', 'id_str', 'mid'), {str_or_none}, any))
|
||||
return {
|
||||
'id': video_id,
|
||||
'extractor_key': WeiboIE.ie_key(),
|
||||
@@ -105,9 +107,10 @@ class WeiboBaseIE(InfoExtractor):
|
||||
'http_headers': {'Referer': 'https://weibo.com/'},
|
||||
'_old_archive_ids': [make_archive_id('WeiboMobile', video_id)],
|
||||
**traverse_obj(video_info, {
|
||||
'id': (('id', 'id_str', 'mid'), {str_or_none}),
|
||||
'display_id': ('mblogid', {str_or_none}),
|
||||
'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, filter),
|
||||
'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'),
|
||||
{lambda x: x.replace('\n', ' ')}, {truncate_string(left=72)}, filter),
|
||||
'alt_title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, filter),
|
||||
'description': ('text_raw', {str}),
|
||||
'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
|
||||
'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
|
||||
@@ -124,14 +127,16 @@ class WeiboBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class WeiboIE(WeiboBaseIE):
|
||||
_VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:m\.weibo\.cn/(?:status|detail)|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://weibo.com/7827771738/N4xlMvjhI',
|
||||
'info_dict': {
|
||||
'id': '4910815147462302',
|
||||
'_old_archive_ids': ['weibomobile 4910815147462302'],
|
||||
'ext': 'mp4',
|
||||
'display_id': 'N4xlMvjhI',
|
||||
'title': '【睡前消息暑假版第一期:拉泰国一把 对中国有好处】',
|
||||
'alt_title': '【睡前消息暑假版第一期:拉泰国一把 对中国有好处】',
|
||||
'description': 'md5:e2637a7673980d68694ea7c43cf12a5f',
|
||||
'duration': 918,
|
||||
'timestamp': 1686312819,
|
||||
@@ -149,9 +154,11 @@ class WeiboIE(WeiboBaseIE):
|
||||
'url': 'https://m.weibo.cn/status/4189191225395228',
|
||||
'info_dict': {
|
||||
'id': '4189191225395228',
|
||||
'_old_archive_ids': ['weibomobile 4189191225395228'],
|
||||
'ext': 'mp4',
|
||||
'display_id': 'FBqgOmDxO',
|
||||
'title': '柴犬柴犬的秒拍视频',
|
||||
'alt_title': '柴犬柴犬的秒拍视频',
|
||||
'description': 'md5:80f461ab5cdae6bbdb70efbf5a1db24f',
|
||||
'duration': 53,
|
||||
'timestamp': 1514264429,
|
||||
@@ -164,17 +171,37 @@ class WeiboIE(WeiboBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://m.weibo.cn/detail/4189191225395228',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://weibo.com/0/4224132150961381',
|
||||
'note': 'no playback_list example',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.weibo.cn/detail/5120561132606436',
|
||||
'info_dict': {
|
||||
'id': '5120561132606436',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self._parse_video_info(self._weibo_download_json(
|
||||
f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id))
|
||||
meta = self._weibo_download_json(f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id)
|
||||
mix_media_info = traverse_obj(meta, ('mix_media_info', 'items', ...))
|
||||
if not mix_media_info:
|
||||
return self._parse_video_info(meta)
|
||||
|
||||
return self.playlist_result(self._entries(mix_media_info), video_id)
|
||||
|
||||
def _entries(self, mix_media_info):
|
||||
for media_info in traverse_obj(mix_media_info, lambda _, v: v['type'] != 'pic'):
|
||||
yield self._parse_video_info(traverse_obj(media_info, {
|
||||
'id': ('data', 'object_id'),
|
||||
'page_info': {'media_info': ('data', 'media_info', {dict})},
|
||||
}))
|
||||
|
||||
|
||||
class WeiboVideoIE(WeiboBaseIE):
|
||||
@@ -186,6 +213,7 @@ class WeiboVideoIE(WeiboBaseIE):
|
||||
'ext': 'mp4',
|
||||
'display_id': 'LEZDodaiW',
|
||||
'title': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了',
|
||||
'alt_title': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了',
|
||||
'description': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了 http://t.cn/A6aerGsM \u200b\u200b\u200b',
|
||||
'duration': 76,
|
||||
'timestamp': 1659344278,
|
||||
@@ -197,6 +225,7 @@ class WeiboVideoIE(WeiboBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'_old_archive_ids': ['weibomobile 4797700463137878'],
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
@@ -100,8 +100,8 @@ class WSJIE(InfoExtractor):
|
||||
|
||||
|
||||
class WSJArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/(?:articles|opinion)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
|
||||
'info_dict': {
|
||||
'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
|
||||
@@ -110,11 +110,20 @@ class WSJArticleIE(InfoExtractor):
|
||||
'uploader_id': 'ralcaraz',
|
||||
'title': 'Bao Bao the Panda Leaves for China',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.wsj.com/opinion/hamas-hostages-caskets-bibas-family-israel-gaza-29da083b',
|
||||
'info_dict': {
|
||||
'id': 'CE68D629-8DB8-4CD3-B30A-92112C102054',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241007',
|
||||
'uploader_id': 'Tinnes, David',
|
||||
'title': 'WSJ Opinion: "Get the Jew": The Crown Heights Riot Revisited',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
webpage = self._download_webpage(url, article_id, impersonate=True)
|
||||
video_id = self._search_regex(
|
||||
r'(?:id=["\']video|video-|iframe\.html\?guid=|data-src=["\'])([a-fA-F0-9-]{36})',
|
||||
webpage, 'video id')
|
||||
|
||||
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
|
||||
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.(?:com|desi)|xhday\.com|xhvid\.com)'
|
||||
_VALID_URL = rf'''(?x)
|
||||
https?://
|
||||
(?:[^/?#]+\.)?{_DOMAINS}/
|
||||
@@ -31,7 +31,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
|
||||
'md5': '34e1ab926db5dc2750fed9e1f34304bb',
|
||||
'md5': 'e009ea6b849b129e3bebaeb9cf0dee51',
|
||||
'info_dict': {
|
||||
'id': '1509445',
|
||||
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
|
||||
@@ -43,6 +43,11 @@ class XHamsterIE(InfoExtractor):
|
||||
'uploader_id': 'ruseful2011',
|
||||
'duration': 893,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://thumb-nss.xhcdn.com/a/u3Vr5F2vvcU3yK59_jJqVA/001/509/445/1280x720.8.jpg',
|
||||
'uploader_url': 'https://xhamster.com/users/ruseful2011',
|
||||
'description': '',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
|
||||
@@ -56,6 +61,10 @@ class XHamsterIE(InfoExtractor):
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200,
|
||||
'age_limit': 18,
|
||||
'description': '',
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://thumb-nss.xhcdn.com/a/kk5nio_iR-h4Z3frfVtoDw/002/221/348/1280x720.4.jpg',
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -73,6 +82,11 @@ class XHamsterIE(InfoExtractor):
|
||||
'uploader_id': 'parejafree',
|
||||
'duration': 72,
|
||||
'age_limit': 18,
|
||||
'comment_count': int,
|
||||
'uploader_url': 'https://xhamster.com/users/parejafree',
|
||||
'description': '',
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://thumb-nss.xhcdn.com/a/xc8MSwVKcsQeRRiTT-saMQ/005/667/973/1280x720.2.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -122,6 +136,9 @@ class XHamsterIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://xhamster20.desi/videos/my-verification-video-scottishmistress23-11937369',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -267,7 +284,7 @@ class XHamsterIE(InfoExtractor):
|
||||
video, lambda x: x['rating']['likes'], int)),
|
||||
'dislike_count': int_or_none(try_get(
|
||||
video, lambda x: x['rating']['dislikes'], int)),
|
||||
'comment_count': int_or_none(video.get('views')),
|
||||
'comment_count': int_or_none(video.get('comments')),
|
||||
'age_limit': age_limit if age_limit is not None else 18,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
|
||||
@@ -5,12 +5,13 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class XiaoHongShuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.xiaohongshu\.com/explore/(?P<id>[\da-f]+)'
|
||||
_VALID_URL = r'https?://www\.xiaohongshu\.com/(?:explore|discovery/item)/(?P<id>[\da-f]+)'
|
||||
IE_DESC = '小红书'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.xiaohongshu.com/explore/6411cf99000000001300b6d9',
|
||||
@@ -25,6 +26,18 @@ class XiaoHongShuIE(InfoExtractor):
|
||||
'duration': 101.726,
|
||||
'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.xiaohongshu.com/discovery/item/674051740000000007027a15?xsec_token=CBgeL8Dxd1ZWBhwqRd568gAZ_iwG-9JIf9tnApNmteU2E=',
|
||||
'info_dict': {
|
||||
'id': '674051740000000007027a15',
|
||||
'ext': 'mp4',
|
||||
'title': '相互喜欢就可以了',
|
||||
'uploader_id': '63439913000000001901f49a',
|
||||
'duration': 28.073,
|
||||
'description': '#广州[话题]# #深圳[话题]# #香港[话题]# #街头采访[话题]# #是你喜欢的类型[话题]#',
|
||||
'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[\da-f]+/[^/]+',
|
||||
'tags': ['广州', '深圳', '香港', '街头采访', '是你喜欢的类型'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -34,7 +47,7 @@ class XiaoHongShuIE(InfoExtractor):
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', display_id, transform_source=js_to_json)
|
||||
|
||||
note_info = traverse_obj(initial_state, ('note', 'noteDetailMap', display_id, 'note'))
|
||||
video_info = traverse_obj(note_info, ('video', 'media', 'stream', ('h264', 'av1', 'h265'), ...))
|
||||
video_info = traverse_obj(note_info, ('video', 'media', 'stream', ..., ...))
|
||||
|
||||
formats = []
|
||||
for info in video_info:
|
||||
@@ -44,18 +57,32 @@ class XiaoHongShuIE(InfoExtractor):
|
||||
'height': ('height', {int_or_none}),
|
||||
'vcodec': ('videoCodec', {str}),
|
||||
'acodec': ('audioCodec', {str}),
|
||||
'abr': ('audioBitrate', {int_or_none}),
|
||||
'vbr': ('videoBitrate', {int_or_none}),
|
||||
'abr': ('audioBitrate', {int_or_none(scale=1000)}),
|
||||
'vbr': ('videoBitrate', {int_or_none(scale=1000)}),
|
||||
'audio_channels': ('audioChannels', {int_or_none}),
|
||||
'tbr': ('avgBitrate', {int_or_none}),
|
||||
'tbr': ('avgBitrate', {int_or_none(scale=1000)}),
|
||||
'format': ('qualityType', {str}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
})
|
||||
|
||||
formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), {
|
||||
formats.extend(traverse_obj(info, (('masterUrl', ('backupUrls', ...)), {
|
||||
lambda u: url_or_none(u) and {'url': u, **format_info}})))
|
||||
|
||||
if origin_key := traverse_obj(note_info, ('video', 'consumer', 'originVideoKey', {str})):
|
||||
# Not using a head request because of false negatives
|
||||
urlh = self._request_webpage(
|
||||
f'https://sns-video-bd.xhscdn.com/{origin_key}', display_id,
|
||||
'Checking original video availability', 'Original video is not available', fatal=False)
|
||||
if urlh:
|
||||
formats.append({
|
||||
'format_id': 'direct',
|
||||
'ext': urlhandle_detect_ext(urlh, default='mp4'),
|
||||
'filesize': int_or_none(urlh.get_header('Content-Length')),
|
||||
'url': urlh.url,
|
||||
'quality': 1,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image_info in traverse_obj(note_info, ('imageList', ...)):
|
||||
thumbnail_info = traverse_obj(image_info, {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user