1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-14 16:38:29 +00:00

Merge branch 'yt-dlp:master' into adobetv

This commit is contained in:
doe1080 2025-08-03 13:47:48 +09:00 committed by GitHub
commit c50867c31a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 433 additions and 293 deletions

View File

@ -1860,6 +1860,7 @@
from .sharepoint import SharePointIE from .sharepoint import SharePointIE
from .sharevideos import ShareVideosEmbedIE from .sharevideos import ShareVideosEmbedIE
from .shemaroome import ShemarooMeIE from .shemaroome import ShemarooMeIE
from .shiey import ShieyIE
from .showroomlive import ShowRoomLiveIE from .showroomlive import ShowRoomLiveIE
from .sibnet import SibnetEmbedIE from .sibnet import SibnetEmbedIE
from .simplecast import ( from .simplecast import (

View File

@ -22,8 +22,23 @@ class FC2IE(InfoExtractor):
'md5': 'a6ebe8ebe0396518689d963774a54eb7', 'md5': 'a6ebe8ebe0396518689d963774a54eb7',
'info_dict': { 'info_dict': {
'id': '20121103kUan1KHs', 'id': '20121103kUan1KHs',
'ext': 'flv',
'title': 'Boxing again with Puff', 'title': 'Boxing again with Puff',
'ext': 'mp4',
'thumbnail': r're:https?://.+\.jpe?g',
},
'params': {
'skip_download': 'm3u8',
},
}, {
# Direct video url
'url': 'https://video.fc2.com/content/20121209FP73fxDx',
'md5': '066bdb9b3a56a97f49cbf0d0b8a75a1f',
'info_dict': {
'id': '20121209FP73fxDx',
'title': 'Farewelling The Wiggles Live in Sydney Dec 8 2012',
'ext': 'mp4',
'thumbnail': r're:https?://.+\.jpe?g',
'description': 'Saying goodbye to the Wiggles at their Celebration Concert in Sydney, and what a concert that was!',
}, },
}, { }, {
'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/', 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
@ -104,7 +119,7 @@ def _real_extract(self, url):
'title': title, 'title': title,
'url': vid_url, 'url': vid_url,
'ext': 'mp4', 'ext': 'mp4',
'protocol': 'm3u8_native', 'protocol': 'm3u8_native' if vidplaylist.get('type') == 2 else 'https',
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }

View File

@ -1,4 +1,5 @@
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -38,7 +39,7 @@ class N1InfoIIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
# YouTube embedded # YouTube embedded
'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/', 'url': 'https://sportklub.n1info.rs/tenis/us-open/glava-telo-igra-kako-je-novak-ispustio-istorijsku-sansu/',
'md5': '987ce6fd72acfecc453281e066b87973', 'md5': '987ce6fd72acfecc453281e066b87973',
'info_dict': { 'info_dict': {
'id': 'L5Hd4hQVUpk', 'id': 'L5Hd4hQVUpk',
@ -67,36 +68,24 @@ class N1InfoIIE(InfoExtractor):
'playable_in_embed': True, 'playable_in_embed': True,
'availability': 'public', 'availability': 'public',
'live_status': 'not_live', 'live_status': 'not_live',
'media_type': 'video',
}, },
}, { }, {
'url': 'https://rs.n1info.com/vesti/djilas-los-plan-za-metro-nece-resiti-nijedan-saobracajni-problem/', 'url': 'https://n1info.si/novice/svet/v-srbiji-samo-ta-konec-tedna-vec-kot-200-pozarov/',
'info_dict': { 'info_dict': {
'id': 'bgmetrosot2409zta20210924174316682-n1info-rs-worldwide', 'id': '2182656',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Đilas: Predlog izgradnje metroa besmislen; SNS odbacuje navode', 'title': 'V Srbiji samo ta konec tedna več kot 200 požarov',
'upload_date': '20210924', 'timestamp': 1753611983,
'timestamp': 1632481347, 'upload_date': '20250727',
'thumbnail': 'http://n1info.rs/wp-content/themes/ucnewsportal-n1/dist/assets/images/placeholder-image-video.jpg', 'thumbnail': 'https://n1info.si/media/images/2025/7/1753611048_Pozar.width-1200.webp',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://n1info.si/novice/slovenija/zadnji-dnevi-na-kopaliscu-ilirija-ilirija-ni-umrla-ubili-so-jo/',
'info_dict': {
'id': 'ljsottomazilirija3060921-n1info-si-worldwide',
'ext': 'mp4',
'title': 'Zadnji dnevi na kopališču Ilirija: “Ilirija ni umrla, ubili so jo”',
'timestamp': 1632567630,
'upload_date': '20210925',
'thumbnail': 'https://n1info.si/wp-content/uploads/2021/09/06/1630945843-tomaz3.png',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# Reddit embedded # Reddit embedded
'url': 'https://ba.n1info.com/lifestyle/vucic-bolji-od-tita-ako-izgubi-ja-cu-da-crknem-jugoslavija-je-gotova/', 'url': 'https://nova.rs/vesti/drustvo/ako-vucic-izgubi-izbore-ja-cu-da-crknem-jugoslavija-je-gotova/',
'info_dict': { 'info_dict': {
'id': '2wmfee9eycp71', 'id': '2wmfee9eycp71',
'ext': 'mp4', 'ext': 'mp4',
@ -113,9 +102,6 @@ class N1InfoIIE(InfoExtractor):
'duration': 134, 'duration': 134,
'thumbnail': 'https://external-preview.redd.it/5nmmawSeGx60miQM3Iq-ueC9oyCLTLjjqX-qqY8uRsc.png?format=pjpg&auto=webp&s=2f973400b04d23f871b608b178e47fc01f9b8f1d', 'thumbnail': 'https://external-preview.redd.it/5nmmawSeGx60miQM3Iq-ueC9oyCLTLjjqX-qqY8uRsc.png?format=pjpg&auto=webp&s=2f973400b04d23f871b608b178e47fc01f9b8f1d',
}, },
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://nova.rs/vesti/politika/zaklina-tatalovic-ani-brnabic-pricate-lazi-video/', 'url': 'https://nova.rs/vesti/politika/zaklina-tatalovic-ani-brnabic-pricate-lazi-video/',
'info_dict': { 'info_dict': {
@ -126,6 +112,9 @@ class N1InfoIIE(InfoExtractor):
'timestamp': 1635861677, 'timestamp': 1635861677,
'thumbnail': 'https://nova.rs/wp-content/uploads/2021/11/02/1635860298-TNJG_Ana_Brnabic_i_Zaklina_Tatalovic_100_dana_Vlade_GP.jpg', 'thumbnail': 'https://nova.rs/wp-content/uploads/2021/11/02/1635860298-TNJG_Ana_Brnabic_i_Zaklina_Tatalovic_100_dana_Vlade_GP.jpg',
}, },
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/', 'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
'info_dict': { 'info_dict': {
@ -155,12 +144,15 @@ def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title') title = self._og_search_title(webpage) or self._html_extract_title(webpage)
timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage)) timestamp = unified_timestamp(self._og_search_property('published_time', webpage, default=None) or self._html_search_meta('article:published_time', webpage))
plugin_data = re.findall(r'\$bp\("(?:Brid|TargetVideo)_\d+",\s(.+)\);', webpage) plugin_data = re.findall(r'\$bp\("(?:Brid|TargetVideo)_\d+",\s(.+)\);', webpage)
entries = [] entries = []
if plugin_data: if plugin_data:
site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id') site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id', default=None)
if site_id is None:
site_id = self._search_regex(
r'partners/(\d+)', self._html_search_meta('contentUrl', webpage, fatal=True), 'site ID')
for video_data in plugin_data: for video_data in plugin_data:
video_id = self._parse_json(video_data, title)['video'] video_id = self._parse_json(video_data, title)['video']
entries.append({ entries.append({
@ -191,10 +183,13 @@ def _real_extract(self, url):
for embedded_video in embedded_videos: for embedded_video in embedded_videos:
video_data = extract_attributes(embedded_video) video_data = extract_attributes(embedded_video)
url = video_data.get('src') or '' url = video_data.get('src') or ''
if url.startswith('https://www.youtube.com'): hostname = urllib.parse.urlparse(url).hostname
if hostname == 'www.youtube.com':
entries.append(self.url_result(url, ie='Youtube')) entries.append(self.url_result(url, ie='Youtube'))
elif url.startswith('https://www.redditmedia.com'): elif hostname == 'www.redditmedia.com':
entries.append(self.url_result(url, ie='Reddit')) entries.append(self.url_result(url, ie='Reddit'))
elif hostname == 'www.facebook.com' and 'plugins/video' in url:
entries.append(self.url_result(url, ie='FacebookPluginsVideo'))
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@ -3,7 +3,6 @@
import itertools import itertools
import json import json
import re import re
import time
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
@ -16,12 +15,12 @@
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_bitrate, parse_bitrate,
parse_duration,
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
parse_resolution, parse_resolution,
qualities, qualities,
str_or_none, str_or_none,
time_seconds,
truncate_string, truncate_string,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -38,8 +37,14 @@
class NiconicoBaseIE(InfoExtractor): class NiconicoBaseIE(InfoExtractor):
_API_BASE = 'https://nvapi.nicovideo.jp'
_BASE_URL = 'https://www.nicovideo.jp'
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['JP'] _GEO_COUNTRIES = ['JP']
_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0',
}
_LOGIN_BASE = 'https://account.nicovideo.jp' _LOGIN_BASE = 'https://account.nicovideo.jp'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
@ -99,146 +104,266 @@ class NiconicoIE(NiconicoBaseIE):
IE_NAME = 'niconico' IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画' IE_DESC = 'ニコニコ動画'
_VALID_URL = r'https?://(?:(?:embed|sp|www)\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?\d+)'
_ERROR_MAP = {
'FORBIDDEN': {
'ADMINISTRATOR_DELETE_VIDEO': 'Video unavailable, possibly removed by admins',
'CHANNEL_MEMBER_ONLY': 'Channel members only',
'DELETED_CHANNEL_VIDEO': 'Video unavailable, channel was closed',
'DELETED_COMMUNITY_VIDEO': 'Video unavailable, community deleted or missing',
'DEFAULT': 'Page unavailable, check the URL',
'HARMFUL_VIDEO': 'Sensitive content, login required',
'HIDDEN_VIDEO': 'Video unavailable, set to private',
'NOT_ALLOWED': 'No permission',
'PPV_VIDEO': 'PPV video, payment information required',
'PREMIUM_ONLY': 'Premium members only',
},
'INVALID_PARAMETER': {
'DEFAULT': 'Video unavailable, may not exist or was deleted',
},
'MAINTENANCE': {
'DEFAULT': 'Maintenance is in progress',
},
'NOT_FOUND': {
'DEFAULT': 'Video unavailable, may not exist or was deleted',
'RIGHT_HOLDER_DELETE_VIDEO': 'Removed by rights-holder request',
},
'UNAUTHORIZED': {
'DEFAULT': 'Invalid session, re-login required',
},
'UNKNOWN': {
'DEFAULT': 'Failed to fetch content',
},
}
_STATUS_MAP = {
'needs_auth': 'PPV video, payment information required',
'premium_only': 'Premium members only',
'subscriber_only': 'Channel members only',
}
_TESTS = [{ _TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215', 'url': 'https://www.nicovideo.jp/watch/1173108780',
'info_dict': { 'info_dict': {
'id': 'sm22312215', 'id': 'sm9',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Big Buck Bunny', 'title': '新・豪血寺一族 -煩悩解放 - レッツゴー!陰陽師',
'thumbnail': r're:https?://.*', 'availability': 'public',
'uploader': 'takuya0301', 'channel': '中の',
'uploader_id': '2698420', 'channel_id': '4',
'upload_date': '20131123',
'timestamp': int, # timestamp is unstable
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
'view_count': int,
'comment_count': int, 'comment_count': int,
'description': 'md5:b7f6d3e6c29552cc19fdea6a4b7dc194',
'display_id': '1173108780',
'duration': 320,
'genres': ['未設定'], 'genres': ['未設定'],
'tags': [], 'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1173108780,
'upload_date': '20070305',
'uploader': '中の',
'uploader_id': '4',
'view_count': int,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
# File downloaded with and without credentials are different, so omit 'url': 'https://www.nicovideo.jp/watch/sm8628149',
# the md5 field 'info_dict': {
'url': 'http://www.nicovideo.jp/watch/nm14296458', 'id': 'sm8628149',
'ext': 'mp4',
'title': '【東方】Bad Apple!!\u3000PV【影絵】',
'availability': 'public',
'channel': 'あにら',
'channel_id': '10731211',
'comment_count': int,
'description': 'md5:1999669158cb77a45bab123c4fafe1d7',
'display_id': 'sm8628149',
'duration': 219,
'genres': ['ゲーム'],
'like_count': int,
'tags': 'mincount:3',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1256580802,
'upload_date': '20091026',
'uploader': 'あにら',
'uploader_id': '10731211',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.nicovideo.jp/watch/nm14296458',
'info_dict': { 'info_dict': {
'id': 'nm14296458', 'id': 'nm14296458',
'ext': 'mp4', 'ext': 'mp4',
'title': '【Kagamine Rin】Dance on media【Original】take2!', 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
'availability': 'public',
'channel': 'りょうた',
'channel_id': '18822557',
'comment_count': int,
'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5', 'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5',
'thumbnail': r're:https?://.*', 'display_id': 'nm14296458',
'duration': 208,
'genres': ['音楽・サウンド'],
'like_count': int,
'tags': 'mincount:1',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1304065916,
'upload_date': '20110429',
'uploader': 'りょうた', 'uploader': 'りょうた',
'uploader_id': '18822557', 'uploader_id': '18822557',
'upload_date': '20110429',
'timestamp': 1304065916,
'duration': 208.0,
'comment_count': int,
'view_count': int, 'view_count': int,
'genres': ['音楽・サウンド'],
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
# 'video exists but is marked as "deleted" 'url': 'https://www.nicovideo.jp/watch/nl1872567',
# md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm10000',
'info_dict': { 'info_dict': {
'id': 'sm10000', 'id': 'nl1872567',
'ext': 'unknown_video',
'description': 'deleted',
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」前編',
'thumbnail': r're:https?://.*',
'upload_date': '20071224',
'timestamp': int, # timestamp field has different value if logged in
'duration': 304,
'view_count': int,
},
'skip': 'Requires an account',
}, {
'url': 'http://www.nicovideo.jp/watch/so22543406',
'info_dict': {
'id': '1388129933',
'ext': 'mp4', 'ext': 'mp4',
'title': '【第1回】RADIOアニメロミックス ラブライブのぞえりRadio Garden', 'title': '【12/25放送分】『生対談!!ひろゆきと戀塚のニコニコを作った人 』前半',
'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', 'availability': 'public',
'thumbnail': r're:https?://.*', 'channel': 'nicolive',
'timestamp': 1388851200, 'channel_id': '394',
'upload_date': '20140104',
'uploader': 'アニメロチャンネル',
'uploader_id': '312',
},
'skip': 'The viewing period of the video you were searching for has expired.',
}, {
# video not available via `getflv`; "old" HTML5 video
'url': 'http://www.nicovideo.jp/watch/sm1151009',
'info_dict': {
'id': 'sm1151009',
'ext': 'mp4',
'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
'description': 'md5:f95a3d259172667b293530cc2e41ebda',
'thumbnail': r're:https?://.*',
'duration': 184,
'timestamp': 1190835883,
'upload_date': '20070926',
'uploader': 'denden2',
'uploader_id': '1392194',
'view_count': int,
'comment_count': int,
'genres': ['ゲーム'],
'tags': [],
},
'params': {'skip_download': 'm3u8'},
}, {
# "New" HTML5 video
'url': 'http://www.nicovideo.jp/watch/sm31464864',
'info_dict': {
'id': 'sm31464864',
'ext': 'mp4',
'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
'timestamp': 1498481660,
'upload_date': '20170626',
'uploader': 'no-namamae',
'uploader_id': '40826363',
'thumbnail': r're:https?://.*',
'duration': 198,
'view_count': int,
'comment_count': int,
'genres': ['アニメ'],
'tags': [],
},
'params': {'skip_download': 'm3u8'},
}, {
# Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488',
'info_dict': {
'id': 'sm18238488',
'ext': 'mp4',
'title': '【実写版】ミュータントタートルズ',
'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
'timestamp': 1341128008,
'upload_date': '20120701',
'thumbnail': r're:https?://.*',
'duration': 5271,
'view_count': int,
'comment_count': int, 'comment_count': int,
'description': 'md5:79fc3a54cfdc93ecc2b883285149e548',
'display_id': 'nl1872567',
'duration': 586,
'genres': ['エンターテイメント'], 'genres': ['エンターテイメント'],
'tags': [], 'like_count': int,
'tags': 'mincount:3',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1198637246,
'upload_date': '20071226',
'uploader': 'nicolive',
'uploader_id': '394',
'view_count': int,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
}, {
'note': 'a video that is only served as an ENCRYPTED HLS.',
'url': 'https://www.nicovideo.jp/watch/so38016254', 'url': 'https://www.nicovideo.jp/watch/so38016254',
'only_matching': True, 'info_dict': {
'id': 'so38016254',
'ext': 'mp4',
'title': '「のんのんびより のんすとっぷ」 PV',
'availability': 'public',
'channel': 'のんのんびより のんすとっぷ',
'channel_id': 'ch2647028',
'comment_count': int,
'description': 'md5:6e2ff55b33e3645d59ef010869cde6a2',
'display_id': 'so38016254',
'duration': 114,
'genres': ['アニメ'],
'like_count': int,
'tags': 'mincount:4',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1609146000,
'upload_date': '20201228',
'uploader': 'のんのんびより のんすとっぷ',
'uploader_id': 'ch2647028',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
}, {
# smile official, but marked as user video
'url': 'https://www.nicovideo.jp/watch/so37602536',
'info_dict': {
'id': 'so37602536',
'ext': 'mp4',
'title': '田中有紀とゆきだるまと! 限定放送アーカイブ第12回',
'availability': 'subscriber_only',
'channel': 'あみあみ16',
'channel_id': '91072761',
'comment_count': int,
'description': 'md5:2ee357ec4e76d7804fb59af77107ab67',
'display_id': 'so37602536',
'duration': 980,
'genres': ['エンターテイメント'],
'like_count': int,
'tags': 'count:4',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1601377200,
'upload_date': '20200929',
'uploader': 'あみあみ16',
'uploader_id': '91072761',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only',
}, {
'url': 'https://www.nicovideo.jp/watch/so41370536',
'info_dict': {
'id': 'so41370536',
'ext': 'mp4',
'title': 'ZUN【出演者別】超パーティー2022',
'availability': 'premium_only',
'channel': 'ニコニコ超会議チャンネル',
'channel_id': 'ch2607134',
'comment_count': int,
'description': 'md5:5692db5ac40d3a374fc5ec182d0249c3',
'display_id': 'so41370536',
'duration': 63,
'genres': ['音楽・サウンド'],
'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1668394800,
'upload_date': '20221114',
'uploader': 'ニコニコ超会議チャンネル',
'uploader_id': 'ch2607134',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Premium members only',
}, {
'url': 'https://www.nicovideo.jp/watch/so37574174',
'info_dict': {
'id': 'so37574174',
'ext': 'mp4',
'title': 'ひぐらしのなく頃に 廿回し編\u3000第1回',
'availability': 'subscriber_only',
'channel': '「ひぐらしのなく頃に」オフィシャルチャンネル',
'channel_id': 'ch2646036',
'comment_count': int,
'description': 'md5:5296196d51d9c0b7272b73f9a99c236a',
'display_id': 'so37574174',
'duration': 1931,
'genres': ['ラジオ'],
'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1601028000,
'upload_date': '20200925',
'uploader': '「ひぐらしのなく頃に」オフィシャルチャンネル',
'uploader_id': 'ch2646036',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only',
}, {
'url': 'https://www.nicovideo.jp/watch/so44060088',
'info_dict': {
'id': 'so44060088',
'ext': 'mp4',
'title': '松田的超英雄電波。《仮面ライダーガッチャード 放送終了記念特別番組》',
'availability': 'subscriber_only',
'channel': 'あみあみチャンネル',
'channel_id': 'ch2638921',
'comment_count': int,
'description': 'md5:9dec5bb9a172b6d20a255ecb64fbd03e',
'display_id': 'so44060088',
'duration': 1881,
'genres': ['ラジオ'],
'like_count': int,
'tags': 'mincount:7',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1725361200,
'upload_date': '20240903',
'uploader': 'あみあみチャンネル',
'uploader_id': 'ch2638921',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only; specified continuous membership period required',
}] }]
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)' def _extract_formats(self, api_data, video_id):
def _yield_dms_formats(self, api_data, video_id):
fmt_filter = lambda _, v: v['isAvailable'] and v['id'] fmt_filter = lambda _, v: v['isAvailable'] and v['id']
videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter)) videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter)) audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
@ -247,164 +372,135 @@ def _yield_dms_formats(self, api_data, video_id):
if not all((videos, audios, access_key, track_id)): if not all((videos, audios, access_key, track_id)):
return return
dms_m3u8_url = self._download_json( m3u8_url = self._download_json(
f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id, f'{self._API_BASE}/v1/watch/{video_id}/access-rights/hls',
data=json.dumps({ video_id, headers={
'Accept': 'application/json;charset=utf-8',
'Content-Type': 'application/json',
'X-Access-Right-Key': access_key,
'X-Request-With': self._BASE_URL,
**self._HEADERS,
}, query={
'actionTrackId': track_id,
}, data=json.dumps({
'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))), 'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))),
}).encode(), query={'actionTrackId': track_id}, headers={ }).encode(),
'x-access-right-key': access_key, )['data']['contentUrl']
'x-frontend-id': 6, raw_fmts = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
'x-frontend-version': 0,
'x-request-with': 'https://www.nicovideo.jp',
})['data']['contentUrl']
# Getting all audio formats results in duplicate video formats which we filter out later
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix formats = []
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'): for a_fmt in traverse_obj(raw_fmts, lambda _, v: v['vcodec'] == 'none'):
yield { formats.append({
**audio_fmt, **a_fmt,
**traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), { **traverse_obj(audios, (lambda _, v: a_fmt['format_id'].startswith(v['id']), {
'format_id': ('id', {str}),
'abr': ('bitRate', {float_or_none(scale=1000)}), 'abr': ('bitRate', {float_or_none(scale=1000)}),
'asr': ('samplingRate', {int_or_none}), 'asr': ('samplingRate', {int_or_none}),
'format_id': ('id', {str}),
'quality': ('qualityLevel', {int_or_none}), 'quality': ('qualityLevel', {int_or_none}),
}), get_all=False), }, any)),
'acodec': 'aac', 'acodec': 'aac',
} })
# Sort before removing dupes to keep the format dicts with the lowest tbr # Sort first, keeping the lowest-tbr formats
video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr']) v_fmts = sorted((fmt for fmt in raw_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
self._remove_duplicate_formats(video_fmts) self._remove_duplicate_formats(v_fmts)
# Calculate the true vbr/tbr by subtracting the lowest abr # Calculate the true vbr/tbr by subtracting the lowest abr
min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000 min_abr = traverse_obj(audios, (..., 'bitRate', {float_or_none(scale=1000)}, all, {min})) or 0
for video_fmt in video_fmts: for v_fmt in v_fmts:
video_fmt['tbr'] -= min_abr v_fmt['format_id'] = url_basename(v_fmt['url']).rpartition('.')[0]
video_fmt['format_id'] = url_basename(video_fmt['url']).rpartition('.')[0] v_fmt['quality'] = traverse_obj(videos, (
video_fmt['quality'] = traverse_obj(videos, ( lambda _, v: v['id'] == v_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1 v_fmt['tbr'] -= min_abr
yield video_fmt formats.extend(v_fmts)
def _extract_server_response(self, webpage, video_id, fatal=True): return formats
try:
return traverse_obj(
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
('data', 'response', {dict}, {require('server response')}))
except ExtractorError:
if not fatal:
return {}
raise
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
try: path = 'v3' if self.is_logged_in else 'v3_guest'
webpage, handle = self._download_webpage_handle( api_resp = self._download_json(
f'https://www.nicovideo.jp/watch/{video_id}', video_id, f'{self._BASE_URL}/api/watch/{path}/{video_id}', video_id,
headers=self.geo_verification_headers()) 'Downloading API JSON', 'Unable to fetch data', headers={
if video_id.startswith('so'): **self._HEADERS,
video_id = self._match_id(handle.url) **self.geo_verification_headers(),
}, query={
'actionTrackId': f'AAAAAAAAAA_{round(time_seconds() * 1000)}',
}, expected_status=[400, 404])
api_data = self._extract_server_response(webpage, video_id) api_data = api_resp['data']
except ExtractorError as e: scheduled_time = traverse_obj(api_data, ('publishScheduledAt', {str}))
try: status = traverse_obj(api_resp, ('meta', 'status', {int}))
api_data = self._download_json(
f'https://www.nicovideo.jp/api/watch/v3/{video_id}', video_id,
'Downloading API JSON', 'Unable to fetch data', query={
'_frontendId': '6',
'_frontendVersion': '0',
'actionTrackId': f'AAAAAAAAAA_{round(time.time() * 1000)}',
}, headers=self.geo_verification_headers())['data']
except ExtractorError:
if not isinstance(e.cause, HTTPError):
# Raise if original exception was from _parse_json or utils.traversal.require
raise
# The webpage server response has more detailed error info than the API response
webpage = e.cause.response.read().decode('utf-8', 'replace')
reason_code = self._extract_server_response(
webpage, video_id, fatal=False).get('reasonCode')
if not reason_code:
raise
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif reason_code == 'HIDDEN_VIDEO':
raise ExtractorError(
'The viewing period of this video has expired', expected=True)
elif reason_code == 'DELETED_VIDEO':
raise ExtractorError('This video has been deleted', expected=True)
raise ExtractorError(f'Niconico says: {reason_code}')
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', { if status != 200:
'needs_premium': ('isPremium', {bool}), err_code = traverse_obj(api_resp, ('meta', 'errorCode', {str.upper}))
reason_code = traverse_obj(api_data, ('reasonCode', {str_or_none}))
err_msg = traverse_obj(self._ERROR_MAP, (err_code, (reason_code, 'DEFAULT'), {str}, any))
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif reason_code == 'HARMFUL_VIDEO' and traverse_obj(api_data, (
'viewer', 'allowSensitiveContents', {bool},
)) is False:
err_msg = 'Sensitive content, adjust display settings to watch'
elif reason_code == 'HIDDEN_VIDEO' and scheduled_time:
err_msg = f'This content is scheduled to be released at {scheduled_time}'
elif reason_code in ('CHANNEL_MEMBER_ONLY', 'HARMFUL_VIDEO', 'HIDDEN_VIDEO', 'PPV_VIDEO', 'PREMIUM_ONLY'):
self.raise_login_required(err_msg)
if err_msg:
raise ExtractorError(err_msg, expected=True)
if status and status >= 500:
raise ExtractorError('Service temporarily unavailable', expected=True)
raise ExtractorError(f'API returned error status {status}')
availability = self._availability(**traverse_obj(api_data, ('payment', 'video', {
'needs_auth': (('isContinuationBenefit', 'isPpv'), {bool}, any),
'needs_subscription': ('isAdmission', {bool}), 'needs_subscription': ('isAdmission', {bool}),
})) or {'needs_auth': True})) 'needs_premium': ('isPremium', {bool}),
}))) or 'public'
formats = list(self._yield_dms_formats(api_data, video_id)) formats = self._extract_formats(api_data, video_id)
if not formats: err_msg = self._STATUS_MAP.get(availability)
fail_msg = clean_html(self._html_search_regex( if not formats and err_msg:
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>', self.raise_login_required(err_msg, metadata_available=True)
webpage, 'fail message', default=None, group='msg'))
if fail_msg:
self.to_screen(f'Niconico said: {fail_msg}')
if fail_msg and 'された地域と同じ地域からのみ視聴できます。' in fail_msg:
availability = None
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
elif availability == 'premium_only':
self.raise_login_required('This video requires premium', metadata_available=True)
elif availability == 'subscriber_only':
self.raise_login_required('This video is for members only', metadata_available=True)
elif availability == 'needs_auth':
self.raise_login_required(metadata_available=False)
# Start extracting information
tags = None
if webpage:
# use og:video:tag (not logged in)
og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
if not tags:
# use keywords and split with comma (not logged in)
kwds = self._html_search_meta('keywords', webpage, default=None)
if kwds:
tags = [x for x in kwds.split(',') if x]
if not tags:
# find in json (logged in)
tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp']) thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
def get_video_info(*items, get_first=True, **kwargs):
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
return { return {
'id': video_id,
'_api_data': api_data,
'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
'formats': formats,
'availability': availability, 'availability': availability,
'thumbnails': [{ 'display_id': video_id,
'id': key, 'formats': formats,
'url': url, 'genres': traverse_obj(api_data, ('genre', 'label', {str}, filter, all, filter)),
'ext': 'jpg', 'release_timestamp': parse_iso8601(scheduled_time),
'preference': thumb_prefs(key),
**parse_resolution(url, lenient=True),
} for key, url in (get_video_info('thumbnail') or {}).items() if url],
'description': clean_html(get_video_info('description')),
'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')),
'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))),
'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
'view_count': int_or_none(get_video_info('count', 'view')),
'tags': tags,
'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')),
'comment_count': get_video_info('count', 'comment', expected_type=int),
'duration': (
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
or get_video_info('duration')),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
'subtitles': self.extract_subtitles(video_id, api_data), 'subtitles': self.extract_subtitles(video_id, api_data),
'tags': traverse_obj(api_data, ('tag', 'items', ..., 'name', {str}, filter, all, filter)),
'thumbnails': [{
'ext': 'jpg',
'id': key,
'preference': thumb_prefs(key),
'url': url,
**parse_resolution(url, lenient=True),
} for key, url in traverse_obj(api_data, (
'video', 'thumbnail', {dict}), default={}).items()],
**traverse_obj(api_data, (('channel', 'owner'), any, {
'channel': (('name', 'nickname'), {str}, any),
'channel_id': ('id', {str_or_none}),
'uploader': (('name', 'nickname'), {str}, any),
'uploader_id': ('id', {str_or_none}),
})),
**traverse_obj(api_data, ('video', {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {clean_html}, filter),
'duration': ('duration', {int_or_none}),
'timestamp': ('registeredAt', {parse_iso8601}),
})),
**traverse_obj(api_data, ('video', 'count', {
'comment_count': ('comment', {int_or_none}),
'like_count': ('like', {int_or_none}),
'view_count': ('view', {int_or_none}),
})),
} }
def _get_subtitles(self, video_id, api_data): def _get_subtitles(self, video_id, api_data):
@ -413,21 +509,19 @@ def _get_subtitles(self, video_id, api_data):
return return
danmaku = traverse_obj(self._download_json( danmaku = traverse_obj(self._download_json(
f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({ f'{comments_info["server"]}/v1/threads', video_id,
'Downloading comments', 'Failed to download comments', headers={
'Content-Type': 'text/plain;charset=UTF-8',
'Origin': self._BASE_URL,
'Referer': f'{self._BASE_URL}/',
'X-Client-Os-Type': 'others',
**self._HEADERS,
}, data=json.dumps({
'additionals': {}, 'additionals': {},
'params': comments_info.get('params'), 'params': comments_info.get('params'),
'threadKey': comments_info.get('threadKey'), 'threadKey': comments_info.get('threadKey'),
}).encode(), fatal=False, }).encode(), fatal=False,
headers={ ), ('data', 'threads', ..., 'comments', ...))
'Referer': 'https://www.nicovideo.jp/',
'Origin': 'https://www.nicovideo.jp',
'Content-Type': 'text/plain;charset=UTF-8',
'x-client-os-type': 'others',
'x-frontend-id': '6',
'x-frontend-version': '0',
},
note='Downloading comments', errnote='Failed to download comments'),
('data', 'threads', ..., 'comments', ...))
return { return {
'comments': [{ 'comments': [{

View File

@ -3,9 +3,9 @@
class RoyaLiveIE(InfoExtractor): class RoyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://roya\.tv/live-stream/(?P<id>\d+)' _VALID_URL = r'https?://(?:en\.)?roya\.tv/live-stream/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://roya.tv/live-stream/1', 'url': 'https://en.roya.tv/live-stream/1',
'info_dict': { 'info_dict': {
'id': '1', 'id': '1',
'title': r're:Roya TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'title': r're:Roya TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

34
yt_dlp/extractor/shiey.py Normal file
View File

@ -0,0 +1,34 @@
import json
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..utils import extract_attributes
from ..utils.traversal import find_element, traverse_obj
class ShieyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?shiey\.com/videos/v/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.shiey.com/videos/v/train-journey-to-edge-of-serbia-ep-2',
'info_dict': {
'id': '1103409448',
'ext': 'mp4',
'title': 'Train Journey To Edge of Serbia (Ep. 2)',
'uploader': 'shiey',
'uploader_url': '',
'duration': 1364,
'thumbnail': r're:^https?://.+',
},
'params': {'skip_download': True},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
oembed_html = traverse_obj(webpage, (
{find_element(attr='data-controller', value='VideoEmbed', html=True)},
{extract_attributes}, 'data-config-embed-video', {json.loads}, 'oembedHtml', {str}))
return self.url_result(VimeoIE._extract_url(url, oembed_html), VimeoIE)

View File

@ -282,6 +282,7 @@ class SubsPoTokenPolicy(BasePoTokenPolicy):
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
}, },
'PLAYER_PARAMS': '8AEB',
'INNERTUBE_CONTEXT_CLIENT_NAME': 2, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
'GVS_PO_TOKEN_POLICY': { 'GVS_PO_TOKEN_POLICY': {
StreamingProtocol.HTTPS: GvsPoTokenPolicy( StreamingProtocol.HTTPS: GvsPoTokenPolicy(