mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 01:18:30 +00:00
Merge branch 'yt-dlp:master' into handle-infinite-redirects
This commit is contained in:
commit
b04cedc423
@ -256,6 +256,7 @@
|
|||||||
BilibiliCheeseIE,
|
BilibiliCheeseIE,
|
||||||
BilibiliCheeseSeasonIE,
|
BilibiliCheeseSeasonIE,
|
||||||
BilibiliCollectionListIE,
|
BilibiliCollectionListIE,
|
||||||
|
BiliBiliDynamicIE,
|
||||||
BilibiliFavoritesListIE,
|
BilibiliFavoritesListIE,
|
||||||
BiliBiliIE,
|
BiliBiliIE,
|
||||||
BiliBiliPlayerIE,
|
BiliBiliPlayerIE,
|
||||||
@ -585,6 +586,10 @@
|
|||||||
EggheadCourseIE,
|
EggheadCourseIE,
|
||||||
EggheadLessonIE,
|
EggheadLessonIE,
|
||||||
)
|
)
|
||||||
|
from .eggs import (
|
||||||
|
EggsArtistIE,
|
||||||
|
EggsIE,
|
||||||
|
)
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
from .elementorembed import ElementorEmbedIE
|
from .elementorembed import ElementorEmbedIE
|
||||||
@ -1279,6 +1284,10 @@
|
|||||||
)
|
)
|
||||||
from .nekohacker import NekoHackerIE
|
from .nekohacker import NekoHackerIE
|
||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
|
from .nest import (
|
||||||
|
NestClipIE,
|
||||||
|
NestIE,
|
||||||
|
)
|
||||||
from .neteasemusic import (
|
from .neteasemusic import (
|
||||||
NetEaseMusicAlbumIE,
|
NetEaseMusicAlbumIE,
|
||||||
NetEaseMusicDjRadioIE,
|
NetEaseMusicDjRadioIE,
|
||||||
@ -1533,6 +1542,10 @@
|
|||||||
PinterestCollectionIE,
|
PinterestCollectionIE,
|
||||||
PinterestIE,
|
PinterestIE,
|
||||||
)
|
)
|
||||||
|
from .piramidetv import (
|
||||||
|
PiramideTVChannelIE,
|
||||||
|
PiramideTVIE,
|
||||||
|
)
|
||||||
from .pixivsketch import (
|
from .pixivsketch import (
|
||||||
PixivSketchIE,
|
PixivSketchIE,
|
||||||
PixivSketchUserIE,
|
PixivSketchUserIE,
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
parse_qs,
|
parse_qs,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
qualities,
|
qualities,
|
||||||
|
sanitize_url,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
@ -1861,6 +1862,47 @@ def _real_extract(self, url):
|
|||||||
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class BiliBiliDynamicIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:t\.bilibili\.com|(?:www\.)?bilibili\.com/opus)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://t.bilibili.com/998134289197432852',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1TAmBYVEJr',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader_id': '1192648858',
|
||||||
|
'comment_count': int,
|
||||||
|
'_old_archive_ids': ['bilibili 113457567568273_part1'],
|
||||||
|
'thumbnail': 'http://i2.hdslb.com/bfs/archive/50091efd965d9f13ff6814f7ad374f90ab21e77d.jpg',
|
||||||
|
'duration': 929.238,
|
||||||
|
'upload_date': '20241110',
|
||||||
|
'uploader': '何同学工作室',
|
||||||
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'title': '美国小朋友就玩这个?!何同学工作室11月开箱',
|
||||||
|
'description': '本期产品信息:\n机器狗\n气味模拟器\nCloudboom Strike LS\n无弦吉他\n蓝牙磁带音箱\n神奇画板',
|
||||||
|
'timestamp': 1731232800,
|
||||||
|
'tags': list,
|
||||||
|
'chapters': list,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
post_id = self._match_id(url)
|
||||||
|
# Without the newer chrome UA, the API will return an error (-352)
|
||||||
|
post_data = self._download_json(
|
||||||
|
'https://api.bilibili.com/x/polymer/web-dynamic/v1/detail', post_id,
|
||||||
|
query={'id': post_id}, headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||||
|
})
|
||||||
|
video_url = traverse_obj(post_data, (
|
||||||
|
'data', 'item', (None, 'orig'), 'modules', 'module_dynamic',
|
||||||
|
(('major', ('archive', 'pgc')), ('additional', ('reserve', 'common'))),
|
||||||
|
'jump_url', {url_or_none}, any, {sanitize_url}))
|
||||||
|
if not video_url or (self.suitable(video_url) and post_id == self._match_id(video_url)):
|
||||||
|
raise ExtractorError('No valid video URL found', expected=True)
|
||||||
|
return self.url_result(video_url)
|
||||||
|
|
||||||
|
|
||||||
class BiliIntlBaseIE(InfoExtractor):
|
class BiliIntlBaseIE(InfoExtractor):
|
||||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||||
_NETRC_MACHINE = 'biliintl'
|
_NETRC_MACHINE = 'biliintl'
|
||||||
|
@ -88,7 +88,7 @@ class BlueskyIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||||
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
|
'md5': 'cc0110ed1f6b0247caac8234cc1e861d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3l3w4tnezek2e',
|
'id': '3l3w4tnezek2e',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -133,6 +133,8 @@ class BlueskyIE(InfoExtractor):
|
|||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'categories': ['Entertainment'],
|
'categories': ['Entertainment'],
|
||||||
'tags': [],
|
'tags': [],
|
||||||
|
'chapters': list,
|
||||||
|
'heatmap': 'count:100',
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
'add_ie': ['Youtube'],
|
||||||
}, {
|
}, {
|
||||||
@ -184,14 +186,14 @@ class BlueskyIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
|
'url': 'https://bsky.app/profile/cinny.bun.how/post/3l7rdfxhyds2f',
|
||||||
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3l7rdfxhyds2f',
|
'id': '3l7rdfxhyds2f',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': 'cinnamon',
|
'uploader': 'cinnamon',
|
||||||
'uploader_id': 'alt.bun.how',
|
'uploader_id': 'cinny.bun.how',
|
||||||
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
|
'uploader_url': 'https://bsky.app/profile/cinny.bun.how',
|
||||||
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||||
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||||
@ -341,6 +343,7 @@ def _extract_videos(self, root, video_id, embed_path='embed', record_path='recor
|
|||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'blob',
|
'format_id': 'blob',
|
||||||
|
'quality': 1,
|
||||||
'url': update_url_query(
|
'url': update_url_query(
|
||||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||||
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||||
|
155
yt_dlp/extractor/eggs.py
Normal file
155
yt_dlp/extractor/eggs.py
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
import secrets
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class EggsBaseIE(InfoExtractor):
|
||||||
|
_API_HEADERS = {
|
||||||
|
'Accept': '*/*',
|
||||||
|
'apVersion': '8.2.00',
|
||||||
|
'deviceName': 'Android',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._API_HEADERS['deviceId'] = secrets.token_hex(8)
|
||||||
|
|
||||||
|
def _call_api(self, endpoint, video_id):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://app-front-api.eggs.mu/v1/{endpoint}', video_id,
|
||||||
|
headers=self._API_HEADERS)
|
||||||
|
|
||||||
|
def _extract_music_info(self, data):
|
||||||
|
if yt_url := traverse_obj(data, ('youtubeUrl', {url_or_none})):
|
||||||
|
return self.url_result(yt_url, ie=YoutubeIE)
|
||||||
|
|
||||||
|
artist_name = traverse_obj(data, ('artist', 'artistName', {str_or_none}))
|
||||||
|
music_id = traverse_obj(data, ('musicId', {str_or_none}))
|
||||||
|
webpage_url = None
|
||||||
|
if artist_name and music_id:
|
||||||
|
webpage_url = f'https://eggs.mu/artist/{artist_name}/song/{music_id}'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': music_id,
|
||||||
|
'vcodec': 'none',
|
||||||
|
'webpage_url': webpage_url,
|
||||||
|
'extractor_key': EggsIE.ie_key(),
|
||||||
|
'extractor': EggsIE.IE_NAME,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('musicTitle', {str}),
|
||||||
|
'url': ('musicDataPath', {url_or_none}),
|
||||||
|
'uploader': ('artist', 'displayName', {str}),
|
||||||
|
'uploader_id': ('artist', 'artistId', {str_or_none}),
|
||||||
|
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||||
|
'view_count': ('numberOfMusicPlays', {int_or_none}),
|
||||||
|
'like_count': ('numberOfLikes', {int_or_none}),
|
||||||
|
'comment_count': ('numberOfComments', {int_or_none}),
|
||||||
|
'composers': ('composer', {str}, all),
|
||||||
|
'tags': ('tags', ..., {str}),
|
||||||
|
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||||
|
'artist': ('artist', 'displayName', {str}),
|
||||||
|
})}
|
||||||
|
|
||||||
|
|
||||||
|
class EggsIE(EggsBaseIE):
|
||||||
|
IE_NAME = 'eggs:single'
|
||||||
|
_VALID_URL = r'https?://eggs\.mu/artist/[^/?#]+/song/(?P<id>[\da-f-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'シネマと信号',
|
||||||
|
'uploader': 'Sunny Girl',
|
||||||
|
'thumbnail': r're:https?://.*\.jpg(?:\?.*)?$',
|
||||||
|
'uploader_id': '1607',
|
||||||
|
'like_count': int,
|
||||||
|
'timestamp': 1731327327,
|
||||||
|
'composers': ['橘高連太郎'],
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'artists': ['Sunny Girl'],
|
||||||
|
'upload_date': '20241111',
|
||||||
|
'tags': ['SunnyGirl', 'シネマと信号'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://eggs.mu/artist/KAMO_3pband/song/1d4bc45f-1af6-47a9-8b30-a70cae350b4f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '80cLKA2wnoA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'KAMO「いい女だから」Audio',
|
||||||
|
'uploader': 'KAMO',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'channel_id': 'UCsHLBw2__5Q9y55skXPotOg',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'description': 'md5:d260da711ecbec3e720293dc11401b87',
|
||||||
|
'availability': 'public',
|
||||||
|
'uploader_id': '@KAMO_band',
|
||||||
|
'upload_date': '20240925',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/80cLKA2wnoA/maxresdefault.jpg',
|
||||||
|
'comment_count': int,
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCsHLBw2__5Q9y55skXPotOg',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 151,
|
||||||
|
'like_count': int,
|
||||||
|
'channel': 'KAMO',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'uploader_url': 'https://www.youtube.com/@KAMO_band',
|
||||||
|
'tags': [],
|
||||||
|
'timestamp': 1727271121,
|
||||||
|
'age_limit': 0,
|
||||||
|
'categories': ['People & Blogs'],
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'params': {'skip_download': 'Youtube'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
song_id = self._match_id(url)
|
||||||
|
json_data = self._call_api(f'musics/{song_id}', song_id)
|
||||||
|
return self._extract_music_info(json_data)
|
||||||
|
|
||||||
|
|
||||||
|
class EggsArtistIE(EggsBaseIE):
|
||||||
|
IE_NAME = 'eggs:artist'
|
||||||
|
_VALID_URL = r'https?://eggs\.mu/artist/(?P<id>\w+)/?(?:[?#&]|$)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://eggs.mu/artist/32_sunny_girl',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '32_sunny_girl',
|
||||||
|
'thumbnail': 'https://image-pro.eggs.mu/profile/1607.jpeg?updated_at=2024-04-03T20%3A06%3A00%2B09%3A00',
|
||||||
|
'description': 'Muddy Mine / 東京高田馬場CLUB PHASE / Gt.Vo 橘高 連太郎 / Ba.Cho 小野 ゆうき / Dr 大森 りゅうひこ',
|
||||||
|
'title': 'Sunny Girl',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 18,
|
||||||
|
}, {
|
||||||
|
'url': 'https://eggs.mu/artist/KAMO_3pband',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'KAMO_3pband',
|
||||||
|
'description': '川崎発3ピースバンド',
|
||||||
|
'thumbnail': 'https://image-pro.eggs.mu/profile/35217.jpeg?updated_at=2024-11-27T16%3A31%3A50%2B09%3A00',
|
||||||
|
'title': 'KAMO',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
artist_id = self._match_id(url)
|
||||||
|
artist_data = self._call_api(f'artists/{artist_id}', artist_id)
|
||||||
|
song_data = self._call_api(f'artists/{artist_id}/musics', artist_id)
|
||||||
|
return self.playlist_result(
|
||||||
|
traverse_obj(song_data, ('data', ..., {dict}, {self._extract_music_info})),
|
||||||
|
playlist_id=artist_id, **traverse_obj(artist_data, {
|
||||||
|
'title': ('displayName', {str}),
|
||||||
|
'description': ('profile', {str}),
|
||||||
|
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||||
|
}))
|
@ -310,7 +310,13 @@ def _real_extract(self, url):
|
|||||||
if stream_type in self._SUPPORTED_STREAM_TYPES:
|
if stream_type in self._SUPPORTED_STREAM_TYPES:
|
||||||
claim_id, is_live = result['claim_id'], False
|
claim_id, is_live = result['claim_id'], False
|
||||||
streaming_url = self._call_api_proxy(
|
streaming_url = self._call_api_proxy(
|
||||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
'get', claim_id, {
|
||||||
|
'uri': uri,
|
||||||
|
**traverse_obj(parse_qs(url), {
|
||||||
|
'signature': ('signature', 0),
|
||||||
|
'signature_ts': ('signature_ts', 0),
|
||||||
|
}),
|
||||||
|
}, 'streaming url')['streaming_url']
|
||||||
|
|
||||||
# GET request to v3 API returns original video/audio file if available
|
# GET request to v3 API returns original video/audio file if available
|
||||||
direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
|
direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
|
||||||
|
117
yt_dlp/extractor/nest.py
Normal file
117
yt_dlp/extractor/nest.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError, float_or_none, update_url_query, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class NestIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://video\.nest\.com/(?:embedded/)?live/(?P<id>\w+)'
|
||||||
|
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.nest.com/embedded/live/4fvYdSo8AX?autoplay=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4fvYdSo8AX',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'startswith:Outside ',
|
||||||
|
'alt_title': 'Outside',
|
||||||
|
'description': '<null>',
|
||||||
|
'location': 'Los Angeles',
|
||||||
|
'availability': 'public',
|
||||||
|
'thumbnail': r're:https?://',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.nest.com/live/4fvYdSo8AX',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.pacificblue.biz/noyo-harbor-webcam/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4fvYdSo8AX',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'startswith:Outside ',
|
||||||
|
'alt_title': 'Outside',
|
||||||
|
'description': '<null>',
|
||||||
|
'location': 'Los Angeles',
|
||||||
|
'availability': 'public',
|
||||||
|
'thumbnail': r're:https?://',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
item = self._download_json(
|
||||||
|
'https://video.nest.com/api/dropcam/cameras.get_by_public_token',
|
||||||
|
video_id, query={'token': video_id})['items'][0]
|
||||||
|
uuid = item.get('uuid')
|
||||||
|
stream_domain = item.get('live_stream_host')
|
||||||
|
if not stream_domain or not uuid:
|
||||||
|
raise ExtractorError('Unable to construct playlist URL')
|
||||||
|
|
||||||
|
thumb_domain = item.get('nexus_api_nest_domain_host')
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
**traverse_obj(item, {
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'title': (('title', 'name', 'where'), {str}, filter, any),
|
||||||
|
'alt_title': ('name', {str}),
|
||||||
|
'location': ((('timezone', {lambda x: x.split('/')[1].replace('_', ' ')}), 'where'), {str}, filter, any),
|
||||||
|
}),
|
||||||
|
'thumbnail': update_url_query(
|
||||||
|
f'https://{thumb_domain}/get_image',
|
||||||
|
{'uuid': uuid, 'public': video_id}) if thumb_domain else None,
|
||||||
|
'availability': self._availability(is_private=item.get('is_public') is False),
|
||||||
|
'formats': self._extract_m3u8_formats(
|
||||||
|
f'https://{stream_domain}/nexus_aac/{uuid}/playlist.m3u8',
|
||||||
|
video_id, 'mp4', live=True, query={'public': video_id}),
|
||||||
|
'is_live': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NestClipIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://video\.nest\.com/(?:embedded/)?clip/(?P<id>\w+)'
|
||||||
|
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.nest.com/clip/f34c9dd237a44eca9a0001af685e3dff',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'f34c9dd237a44eca9a0001af685e3dff',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'NestClip video #f34c9dd237a44eca9a0001af685e3dff',
|
||||||
|
'thumbnail': 'https://clips.dropcam.com/f34c9dd237a44eca9a0001af685e3dff.jpg',
|
||||||
|
'timestamp': 1735413474.468,
|
||||||
|
'upload_date': '20241228',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.nest.com/embedded/clip/34e0432adc3c46a98529443d8ad5aa76',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '34e0432adc3c46a98529443d8ad5aa76',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Shootout at Veterans Boulevard at Fleur De Lis Drive',
|
||||||
|
'thumbnail': 'https://clips.dropcam.com/34e0432adc3c46a98529443d8ad5aa76.jpg',
|
||||||
|
'upload_date': '20230817',
|
||||||
|
'timestamp': 1692262897.191,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data = self._download_json(
|
||||||
|
'https://video.nest.com/api/dropcam/videos.get_by_filename', video_id,
|
||||||
|
query={'filename': f'{video_id}.mp4'})
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
**traverse_obj(data, ('items', 0, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||||
|
'url': ('download_url', {url_or_none}),
|
||||||
|
'timestamp': ('start_time', {float_or_none}),
|
||||||
|
})),
|
||||||
|
}
|
@ -12,6 +12,7 @@
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
@ -171,6 +172,8 @@ def call_playback_api(item, query=None):
|
|||||||
format_url = url_or_none(asset.get('url'))
|
format_url = url_or_none(asset.get('url'))
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
|
# Remove the 'adap' query parameter
|
||||||
|
format_url = update_url_query(format_url, {'adap': []})
|
||||||
asset_format = (asset.get('format') or '').lower()
|
asset_format = (asset.get('format') or '').lower()
|
||||||
if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
|
if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
|
||||||
formats.extend(self._extract_nrk_formats(format_url, video_id))
|
formats.extend(self._extract_nrk_formats(format_url, video_id))
|
||||||
|
99
yt_dlp/extractor/piramidetv.py
Normal file
99
yt_dlp/extractor/piramidetv.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_iso8601, smuggle_url, unsmuggle_url, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class PiramideTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://piramide\.tv/video/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://piramide.tv/video/wWtBAORdJUTh',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wWtBAORdJUTh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:79f9c8183ea6a35c836923142cf0abcc',
|
||||||
|
'description': '',
|
||||||
|
'thumbnail': 'https://cdn.jwplayer.com/v2/media/W86PgQDn/thumbnails/B9gpIxkH.jpg',
|
||||||
|
'channel': 'León Picarón',
|
||||||
|
'channel_id': 'leonpicaron',
|
||||||
|
'timestamp': 1696460362,
|
||||||
|
'upload_date': '20231004',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://piramide.tv/video/wcYn6li79NgN',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wcYn6li79NgN',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ACEPTO TENER UN BEBE CON MI NOVIA\u2026? | Parte 1',
|
||||||
|
'description': '',
|
||||||
|
'channel': 'ARTA GAME',
|
||||||
|
'channel_id': 'arta_game',
|
||||||
|
'thumbnail': 'https://cdn.jwplayer.com/v2/media/cnEdGp5X/thumbnails/rHAaWfP7.jpg',
|
||||||
|
'timestamp': 1703434976,
|
||||||
|
'upload_date': '20231224',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_video(self, video_id):
|
||||||
|
video_data = self._download_json(
|
||||||
|
f'https://hermes.piramide.tv/video/data/{video_id}', video_id, fatal=False)
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
f'https://cdn.piramide.tv/video/{video_id}/manifest.m3u8', video_id, fatal=False)
|
||||||
|
next_video = traverse_obj(video_data, ('video', 'next_video', 'id', {str}))
|
||||||
|
return next_video, {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(video_data, ('video', {
|
||||||
|
'id': ('id', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('media', 'thumbnail', {url_or_none}),
|
||||||
|
'channel': ('channel', 'name', {str}),
|
||||||
|
'channel_id': ('channel', 'id', {str}),
|
||||||
|
'timestamp': ('date', {parse_iso8601}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _entries(self, video_id):
|
||||||
|
visited = set()
|
||||||
|
while True:
|
||||||
|
visited.add(video_id)
|
||||||
|
next_video, info = self._extract_video(video_id)
|
||||||
|
yield info
|
||||||
|
if not next_video or next_video in visited:
|
||||||
|
break
|
||||||
|
video_id = next_video
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
if self._yes_playlist(video_id, video_id, smuggled_data):
|
||||||
|
return self.playlist_result(self._entries(video_id), video_id)
|
||||||
|
return self._extract_video(video_id)[1]
|
||||||
|
|
||||||
|
|
||||||
|
class PiramideTVChannelIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://piramide\.tv/channel/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://piramide.tv/channel/thekalo',
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'thekalo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _entries(self, channel_name):
|
||||||
|
videos = self._download_json(
|
||||||
|
f'https://hermes.piramide.tv/channel/list/{channel_name}/date/100000', channel_name)
|
||||||
|
for video in traverse_obj(videos, ('videos', lambda _, v: v['id'])):
|
||||||
|
yield self.url_result(smuggle_url(
|
||||||
|
f'https://piramide.tv/video/{video["id"]}', {'force_noplaylist': True}),
|
||||||
|
**traverse_obj(video, {
|
||||||
|
'id': ('id', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_name = self._match_id(url)
|
||||||
|
return self.playlist_result(self._entries(channel_name), channel_name)
|
@ -176,6 +176,8 @@ class RTVSLOShowIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '173250997',
|
'id': '173250997',
|
||||||
'title': 'Ekipa Bled',
|
'title': 'Ekipa Bled',
|
||||||
|
'description': 'md5:c88471e27a1268c448747a5325319ab7',
|
||||||
|
'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/173250997/logo_wide1.jpg',
|
||||||
},
|
},
|
||||||
'playlist_count': 18,
|
'playlist_count': 18,
|
||||||
}]
|
}]
|
||||||
@ -187,4 +189,7 @@ def _real_extract(self, url):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
|
re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
|
||||||
playlist_id, self._html_extract_title(webpage),
|
playlist_id, self._html_extract_title(webpage),
|
||||||
getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE)
|
getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE,
|
||||||
|
description=self._og_search_description(webpage),
|
||||||
|
thumbnail=self._og_search_thumbnail(webpage),
|
||||||
|
)
|
||||||
|
@ -4,43 +4,12 @@
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
parse_qs,
|
UnsupportedError,
|
||||||
unsmuggle_url,
|
make_archive_id,
|
||||||
|
remove_end,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
_COMMITTEES = {
|
|
||||||
'ag': ('76440', 'http://ag-f.akamaihd.net'),
|
|
||||||
'aging': ('76442', 'http://aging-f.akamaihd.net'),
|
|
||||||
'approps': ('76441', 'http://approps-f.akamaihd.net'),
|
|
||||||
'arch': ('', 'http://ussenate-f.akamaihd.net'),
|
|
||||||
'armed': ('76445', 'http://armed-f.akamaihd.net'),
|
|
||||||
'banking': ('76446', 'http://banking-f.akamaihd.net'),
|
|
||||||
'budget': ('76447', 'http://budget-f.akamaihd.net'),
|
|
||||||
'cecc': ('76486', 'http://srs-f.akamaihd.net'),
|
|
||||||
'commerce': ('80177', 'http://commerce1-f.akamaihd.net'),
|
|
||||||
'csce': ('75229', 'http://srs-f.akamaihd.net'),
|
|
||||||
'dpc': ('76590', 'http://dpc-f.akamaihd.net'),
|
|
||||||
'energy': ('76448', 'http://energy-f.akamaihd.net'),
|
|
||||||
'epw': ('76478', 'http://epw-f.akamaihd.net'),
|
|
||||||
'ethics': ('76449', 'http://ethics-f.akamaihd.net'),
|
|
||||||
'finance': ('76450', 'http://finance-f.akamaihd.net'),
|
|
||||||
'foreign': ('76451', 'http://foreign-f.akamaihd.net'),
|
|
||||||
'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'),
|
|
||||||
'help': ('76452', 'http://help-f.akamaihd.net'),
|
|
||||||
'indian': ('76455', 'http://indian-f.akamaihd.net'),
|
|
||||||
'intel': ('76456', 'http://intel-f.akamaihd.net'),
|
|
||||||
'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'),
|
|
||||||
'jccic': ('85180', 'http://jccic-f.akamaihd.net'),
|
|
||||||
'jec': ('76458', 'http://jec-f.akamaihd.net'),
|
|
||||||
'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'),
|
|
||||||
'rpc': ('76591', 'http://rpc-f.akamaihd.net'),
|
|
||||||
'rules': ('76460', 'http://rules-f.akamaihd.net'),
|
|
||||||
'saa': ('76489', 'http://srs-f.akamaihd.net'),
|
|
||||||
'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'),
|
|
||||||
'srs': ('75229', 'http://srs-f.akamaihd.net'),
|
|
||||||
'uscc': ('76487', 'http://srs-f.akamaihd.net'),
|
|
||||||
'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SenateISVPIE(InfoExtractor):
|
class SenateISVPIE(InfoExtractor):
|
||||||
@ -53,31 +22,46 @@ class SenateISVPIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'judiciary031715',
|
'id': 'judiciary031715',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Integrated Senate Video Player',
|
'title': 'ISVP',
|
||||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||||
|
'_old_archive_ids': ['senategov judiciary031715'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'commerce011514',
|
'id': 'commerce011514',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Integrated Senate Video Player',
|
'title': 'Integrated Senate Video Player',
|
||||||
|
'_old_archive_ids': ['senategov commerce011514'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'This video is not available.',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
||||||
# checksum differs each time
|
# checksum differs each time
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'intel090613',
|
'id': 'intel090613',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Integrated Senate Video Player',
|
'title': 'ISVP',
|
||||||
|
'_old_archive_ids': ['senategov intel090613'],
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.senate.gov/isvp/?auto_play=false&comm=help&filename=help090920&poster=https://www.help.senate.gov/assets/images/video-poster.png&stt=950',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'help090920',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ISVP',
|
||||||
|
'thumbnail': 'https://www.help.senate.gov/assets/images/video-poster.png',
|
||||||
|
'_old_archive_ids': ['senategov help090920'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# From http://www.c-span.org/video/?96791-1
|
# From http://www.c-span.org/video/?96791-1
|
||||||
@ -85,60 +69,81 @@ class SenateISVPIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_COMMITTEES = {
|
||||||
|
'ag': ('76440', 'https://ag-f.akamaihd.net', '2036803', 'agriculture'),
|
||||||
|
'aging': ('76442', 'https://aging-f.akamaihd.net', '2036801', 'aging'),
|
||||||
|
'approps': ('76441', 'https://approps-f.akamaihd.net', '2036802', 'appropriations'),
|
||||||
|
'arch': ('', 'https://ussenate-f.akamaihd.net', '', 'arch'),
|
||||||
|
'armed': ('76445', 'https://armed-f.akamaihd.net', '2036800', 'armedservices'),
|
||||||
|
'banking': ('76446', 'https://banking-f.akamaihd.net', '2036799', 'banking'),
|
||||||
|
'budget': ('76447', 'https://budget-f.akamaihd.net', '2036798', 'budget'),
|
||||||
|
'cecc': ('76486', 'https://srs-f.akamaihd.net', '2036782', 'srs_cecc'),
|
||||||
|
'commerce': ('80177', 'https://commerce1-f.akamaihd.net', '2036779', 'commerce'),
|
||||||
|
'csce': ('75229', 'https://srs-f.akamaihd.net', '2036777', 'srs_srs'),
|
||||||
|
'dpc': ('76590', 'https://dpc-f.akamaihd.net', '', 'dpc'),
|
||||||
|
'energy': ('76448', 'https://energy-f.akamaihd.net', '2036797', 'energy'),
|
||||||
|
'epw': ('76478', 'https://epw-f.akamaihd.net', '2036783', 'environment'),
|
||||||
|
'ethics': ('76449', 'https://ethics-f.akamaihd.net', '2036796', 'ethics'),
|
||||||
|
'finance': ('76450', 'https://finance-f.akamaihd.net', '2036795', 'finance_finance'),
|
||||||
|
'foreign': ('76451', 'https://foreign-f.akamaihd.net', '2036794', 'foreignrelations'),
|
||||||
|
'govtaff': ('76453', 'https://govtaff-f.akamaihd.net', '2036792', 'hsgac'),
|
||||||
|
'help': ('76452', 'https://help-f.akamaihd.net', '2036793', 'help'),
|
||||||
|
'indian': ('76455', 'https://indian-f.akamaihd.net', '2036791', 'indianaffairs'),
|
||||||
|
'intel': ('76456', 'https://intel-f.akamaihd.net', '2036790', 'intelligence'),
|
||||||
|
'intlnarc': ('76457', 'https://intlnarc-f.akamaihd.net', '', 'internationalnarcoticscaucus'),
|
||||||
|
'jccic': ('85180', 'https://jccic-f.akamaihd.net', '2036778', 'jccic'),
|
||||||
|
'jec': ('76458', 'https://jec-f.akamaihd.net', '2036789', 'jointeconomic'),
|
||||||
|
'judiciary': ('76459', 'https://judiciary-f.akamaihd.net', '2036788', 'judiciary'),
|
||||||
|
'rpc': ('76591', 'https://rpc-f.akamaihd.net', '', 'rpc'),
|
||||||
|
'rules': ('76460', 'https://rules-f.akamaihd.net', '2036787', 'rules'),
|
||||||
|
'saa': ('76489', 'https://srs-f.akamaihd.net', '2036780', 'srs_saa'),
|
||||||
|
'smbiz': ('76461', 'https://smbiz-f.akamaihd.net', '2036786', 'smallbusiness'),
|
||||||
|
'srs': ('75229', 'https://srs-f.akamaihd.net', '2031966', 'srs_srs'),
|
||||||
|
'uscc': ('76487', 'https://srs-f.akamaihd.net', '2036781', 'srs_uscc'),
|
||||||
|
'vetaff': ('76462', 'https://vetaff-f.akamaihd.net', '2036785', 'veteransaffairs'),
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
|
||||||
|
|
||||||
qs = urllib.parse.parse_qs(self._match_valid_url(url).group('qs'))
|
qs = urllib.parse.parse_qs(self._match_valid_url(url).group('qs'))
|
||||||
if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
|
if not qs.get('filename') or not qs.get('comm'):
|
||||||
raise ExtractorError('Invalid URL', expected=True)
|
raise ExtractorError('Invalid URL', expected=True)
|
||||||
|
filename = qs['filename'][0]
|
||||||
video_id = re.sub(r'.mp4$', '', qs['filename'][0])
|
video_id = remove_end(filename, '.mp4')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
committee = qs['comm'][0]
|
||||||
|
|
||||||
if smuggled_data.get('force_title'):
|
stream_num, stream_domain, stream_id, msl3 = self._COMMITTEES[committee]
|
||||||
title = smuggled_data['force_title']
|
|
||||||
else:
|
|
||||||
title = self._html_extract_title(webpage)
|
|
||||||
poster = qs.get('poster')
|
|
||||||
thumbnail = poster[0] if poster else None
|
|
||||||
|
|
||||||
video_type = qs['type'][0]
|
|
||||||
committee = video_type if video_type == 'arch' else qs['comm'][0]
|
|
||||||
|
|
||||||
stream_num, domain = _COMMITTEES[committee]
|
|
||||||
|
|
||||||
|
urls_alternatives = [f'https://www-senate-gov-media-srs.akamaized.net/hls/live/{stream_id}/{committee}/{filename}/master.m3u8',
|
||||||
|
f'https://www-senate-gov-msl3archive.akamaized.net/{msl3}/{filename}_1/master.m3u8',
|
||||||
|
f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
|
||||||
|
f'{stream_domain}/i/{filename}.mp4/master.m3u8']
|
||||||
formats = []
|
formats = []
|
||||||
if video_type == 'arch':
|
subtitles = {}
|
||||||
filename = video_id if '.' in video_id else video_id + '.mp4'
|
for video_url in urls_alternatives:
|
||||||
m3u8_url = urllib.parse.urljoin(domain, 'i/' + filename + '/master.m3u8')
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', fatal=False)
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8')
|
if formats:
|
||||||
else:
|
break
|
||||||
hdcore_sign = 'hdcore=3.1.0'
|
|
||||||
url_params = (domain, video_id, stream_num)
|
|
||||||
f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params
|
|
||||||
m3u8_url = '{}/i/{}_1@{}/master.m3u8'.format(*url_params)
|
|
||||||
for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
|
|
||||||
# URLs without the extra param induce an 404 error
|
|
||||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
|
||||||
formats.append(entry)
|
|
||||||
for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
|
|
||||||
mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
|
|
||||||
if mobj:
|
|
||||||
entry['format_id'] += mobj.group('tag')
|
|
||||||
formats.append(entry)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._html_extract_title(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'subtitles': subtitles,
|
||||||
|
'thumbnail': traverse_obj(qs, ('poster', 0, {url_or_none})),
|
||||||
|
'_old_archive_ids': [make_archive_id(SenateGovIE, video_id)],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SenateGovIE(InfoExtractor):
|
class SenateGovIE(InfoExtractor):
|
||||||
_IE_NAME = 'senate.gov'
|
_IE_NAME = 'senate.gov'
|
||||||
_VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov'
|
_SUBDOMAIN_RE = '|'.join(map(re.escape, (
|
||||||
|
'agriculture', 'aging', 'appropriations', 'armed-services', 'banking',
|
||||||
|
'budget', 'commerce', 'energy', 'epw', 'finance', 'foreign', 'help',
|
||||||
|
'intelligence', 'inaugural', 'judiciary', 'rules', 'sbc', 'veterans',
|
||||||
|
)))
|
||||||
|
_VALID_URL = rf'https?://(?:www\.)?(?:{_SUBDOMAIN_RE})\.senate\.gov'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health',
|
'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -147,6 +152,9 @@ class SenateGovIE(InfoExtractor):
|
|||||||
'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health',
|
'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health',
|
||||||
'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions',
|
'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'age_limit': 0,
|
||||||
|
'thumbnail': 'https://www.help.senate.gov/assets/images/sharelogo.jpg',
|
||||||
|
'_old_archive_ids': ['senategov help090920'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
@ -156,8 +164,12 @@ class SenateGovIE(InfoExtractor):
|
|||||||
'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD',
|
'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD',
|
||||||
'title': 'Review of the FY2019 Budget Request for the U.S. Army',
|
'title': 'Review of the FY2019 Budget Request for the U.S. Army',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'age_limit': 0,
|
||||||
|
'thumbnail': 'https://www.appropriations.senate.gov/themes/appropriations/images/video-poster-flash-fit.png',
|
||||||
|
'_old_archive_ids': ['senategov appropsA051518'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization',
|
'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -166,32 +178,65 @@ class SenateGovIE(InfoExtractor):
|
|||||||
'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization',
|
'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization',
|
||||||
'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs',
|
'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 'https://www.banking.senate.gov/themes/banking/images/sharelogo.jpg',
|
||||||
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['senategov banking041521'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.agriculture.senate.gov/hearings/hemp-production-and-the-2018-farm-bill',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.aging.senate.gov/hearings/the-older-americans-act-the-local-impact-of-the-law-and-the-upcoming-reauthorization',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.budget.senate.gov/hearings/improving-care-lowering-costs-achieving-health-care-efficiency',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.commerce.senate.gov/2024/12/communications-networks-safety-and-security',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.energy.senate.gov/hearings/2024/2/full-committee-hearing-to-examine',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.epw.senate.gov/public/index.cfm/hearings?ID=F63083EA-2C13-498C-B548-341BED68C209',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.foreign.senate.gov/hearings/american-diplomacy-and-global-leadership-review-of-the-fy25-state-department-budget-request',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.intelligence.senate.gov/hearings/foreign-threats-elections-2024-%E2%80%93-roles-and-responsibilities-us-tech-providers',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.inaugural.senate.gov/52nd-inaugural-ceremonies/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rules.senate.gov/hearings/02/07/2023/business-meeting',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.sbc.senate.gov/public/index.cfm/hearings?ID=5B13AA6B-8279-45AF-B54B-94156DC7A2AB',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.veterans.senate.gov/2024/5/frontier-health-care-ensuring-veterans-access-no-matter-where-they-live',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._generic_id(url)
|
display_id = self._generic_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
parse_info = parse_qs(self._search_regex(
|
url_info = next(SenateISVPIE.extract_from_webpage(self._downloader, url, webpage), None)
|
||||||
r'<iframe class="[^>"]*streaminghearing[^>"]*"\s[^>]*\bsrc="([^">]*)', webpage, 'hearing URL'))
|
if not url_info:
|
||||||
|
raise UnsupportedError(url)
|
||||||
stream_num, stream_domain = _COMMITTEES[parse_info['comm'][-1]]
|
|
||||||
filename = parse_info['filename'][-1]
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
|
|
||||||
display_id, ext='mp4')
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
(*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title')
|
(*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': re.sub(r'.mp4$', '', filename),
|
**url_info,
|
||||||
|
'_type': 'url_transparent',
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(),
|
'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(),
|
||||||
'description': self._og_search_description(webpage, default=None),
|
'description': self._og_search_description(webpage, default=None),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'age_limit': self._rta_search(webpage),
|
'age_limit': self._rta_search(webpage),
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
@ -124,7 +124,7 @@ def _parse_video_info(self, video_info, video_id=None):
|
|||||||
|
|
||||||
|
|
||||||
class WeiboIE(WeiboBaseIE):
|
class WeiboIE(WeiboBaseIE):
|
||||||
_VALID_URL = r'https?://(?:m\.weibo\.cn/status|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:m\.weibo\.cn/(?:status|detail)|(?:www\.)?weibo\.com/\d+)/(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://weibo.com/7827771738/N4xlMvjhI',
|
'url': 'https://weibo.com/7827771738/N4xlMvjhI',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -164,6 +164,25 @@ class WeiboIE(WeiboBaseIE):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.weibo.cn/detail/4189191225395228',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4189191225395228',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'FBqgOmDxO',
|
||||||
|
'title': '柴犬柴犬的秒拍视频',
|
||||||
|
'description': '午睡当然是要甜甜蜜蜜的啦![坏笑] Instagram:shibainu.gaku http://t.cn/RHbmjzW ',
|
||||||
|
'duration': 53,
|
||||||
|
'timestamp': 1514264429,
|
||||||
|
'upload_date': '20171226',
|
||||||
|
'thumbnail': r're:https://.*\.jpg',
|
||||||
|
'uploader': '柴犬柴犬',
|
||||||
|
'uploader_id': '5926682210',
|
||||||
|
'uploader_url': 'https://weibo.com/u/5926682210',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://weibo.com/0/4224132150961381',
|
'url': 'https://weibo.com/0/4224132150961381',
|
||||||
'note': 'no playback_list example',
|
'note': 'no playback_list example',
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urlhandle_detect_ext,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
@ -46,7 +47,7 @@ def _real_extract(self, url):
|
|||||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', display_id, transform_source=js_to_json)
|
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', display_id, transform_source=js_to_json)
|
||||||
|
|
||||||
note_info = traverse_obj(initial_state, ('note', 'noteDetailMap', display_id, 'note'))
|
note_info = traverse_obj(initial_state, ('note', 'noteDetailMap', display_id, 'note'))
|
||||||
video_info = traverse_obj(note_info, ('video', 'media', 'stream', ('h264', 'av1', 'h265'), ...))
|
video_info = traverse_obj(note_info, ('video', 'media', 'stream', ..., ...))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for info in video_info:
|
for info in video_info:
|
||||||
@ -56,18 +57,32 @@ def _real_extract(self, url):
|
|||||||
'height': ('height', {int_or_none}),
|
'height': ('height', {int_or_none}),
|
||||||
'vcodec': ('videoCodec', {str}),
|
'vcodec': ('videoCodec', {str}),
|
||||||
'acodec': ('audioCodec', {str}),
|
'acodec': ('audioCodec', {str}),
|
||||||
'abr': ('audioBitrate', {int_or_none}),
|
'abr': ('audioBitrate', {int_or_none(scale=1000)}),
|
||||||
'vbr': ('videoBitrate', {int_or_none}),
|
'vbr': ('videoBitrate', {int_or_none(scale=1000)}),
|
||||||
'audio_channels': ('audioChannels', {int_or_none}),
|
'audio_channels': ('audioChannels', {int_or_none}),
|
||||||
'tbr': ('avgBitrate', {int_or_none}),
|
'tbr': ('avgBitrate', {int_or_none(scale=1000)}),
|
||||||
'format': ('qualityType', {str}),
|
'format': ('qualityType', {str}),
|
||||||
'filesize': ('size', {int_or_none}),
|
'filesize': ('size', {int_or_none}),
|
||||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||||
})
|
})
|
||||||
|
|
||||||
formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), {
|
formats.extend(traverse_obj(info, (('masterUrl', ('backupUrls', ...)), {
|
||||||
lambda u: url_or_none(u) and {'url': u, **format_info}})))
|
lambda u: url_or_none(u) and {'url': u, **format_info}})))
|
||||||
|
|
||||||
|
if origin_key := traverse_obj(note_info, ('video', 'consumer', 'originVideoKey', {str})):
|
||||||
|
# Not using a head request because of false negatives
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
f'https://sns-video-bd.xhscdn.com/{origin_key}', display_id,
|
||||||
|
'Checking original video availability', 'Original video is not available', fatal=False)
|
||||||
|
if urlh:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'direct',
|
||||||
|
'ext': urlhandle_detect_ext(urlh, default='mp4'),
|
||||||
|
'filesize': int_or_none(urlh.get_header('Content-Length')),
|
||||||
|
'url': urlh.url,
|
||||||
|
'quality': 1,
|
||||||
|
})
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for image_info in traverse_obj(note_info, ('imageList', ...)):
|
for image_info in traverse_obj(note_info, ('imageList', ...)):
|
||||||
thumbnail_info = traverse_obj(image_info, {
|
thumbnail_info = traverse_obj(image_info, {
|
||||||
|
Loading…
Reference in New Issue
Block a user