1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-22 16:08:58 +00:00

[ie/ntvcojp] Rework extractor

This commit is contained in:
doe1080
2025-05-26 21:00:13 +09:00
committed by nullpos
parent 9dce83092e
commit ff47f0fbb6
2 changed files with 97 additions and 71 deletions

View File

@@ -1420,7 +1420,7 @@ from .nrk import (
) )
from .nrl import NRLTVIE from .nrl import NRLTVIE
from .nts import NTSLiveIE from .nts import NTSLiveIE
from .ntvcojp import NTVCoJpCUIE from .ntvcojp import NTVJpCuIE
from .ntvde import NTVDeIE from .ntvde import NTVDeIE
from .ntvru import NTVRuIE from .ntvru import NTVRuIE
from .nubilesporn import NubilesPornIE from .nubilesporn import NubilesPornIE

View File

@@ -1,84 +1,110 @@
from .common import InfoExtractor from .streaks import StreaksBaseIE
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError,
clean_html,
int_or_none, int_or_none,
unified_timestamp, parse_iso8601,
urljoin, str_or_none,
url_or_none,
) )
from ..utils.traversal import find_element, traverse_obj from ..utils.traversal import require, traverse_obj
class NTVCoJpCUIE(InfoExtractor): class NTVJpCuIE(StreaksBaseIE):
IE_NAME = 'cu.ntv.co.jp' IE_NAME = 'ntvjp:cu'
IE_DESC = 'Nippon Television Network' IE_DESC = '日テレ無料TADA!'
_VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
_TEST = { _VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program-list)(?P<id>[\w-]+)/?$'
'url': 'https://cu.ntv.co.jp/gaki_20250525/', _TESTS = [
'info_dict': { {
'title': '放送開始36年!方正ココリコが選ぶ神回&地獄回!', 'url': 'https://cu.ntv.co.jp/gaki_20250525/',
'id': 'gaki_20250525', 'info_dict': {
'ext': 'mp4', 'id': 'gaki_20250525',
'categories': ['ダウンタウンのガキの使いやあらへんで!'], 'ext': 'mp4',
'description': '神回地獄回座談会!レギュラー放送1756回の中からココリコと方正が神回地獄回をそれぞれ選んで発表!若手時代の遠藤がガキ使メンバーに振り回される!?田中が好きな懐かしの番組名物キャラに爆笑!?方正が思い出に残っている持ち込み回とは?笑ってはいけないシリーズから遠藤が大汗をかくほど追い詰められる企画が誕生していた!?3人のトラウマになっている過酷罰ゲームを振り返り!方正記念企画のはずがまさかの展開で涙!?', 'title': '放送開始36年!方正ココリコが選ぶ神回&地獄回!',
'timestamp': 1748145124, 'cast': 'count:2',
'release_timestamp': 1748145539, 'description': 'md5:1e1db556224d627d4d2f74370c650927',
'duration': 1450, 'display_id': 'ref:gaki_20250525',
'episode_number': 255, 'duration': 1450,
'episode': '放送開始36年!方正ココリコが選ぶ神回&地獄回!', 'episode': '放送開始36年!方正ココリコが選ぶ神回&地獄回!',
'upload_date': '20250525', 'episode_id': '000000010172808',
'release_date': '20250525', 'episode_number': 255,
'genres': ['variety'],
'live_status': 'not_live',
'modified_date': '20250525',
'modified_timestamp': 1748145537,
'release_date': '20250525',
'release_timestamp': 1748145539,
'series': 'ダウンタウンのガキの使いやあらへんで!',
'series_id': 'gaki',
'thumbnail': r're:https?://.+\.jpg',
'timestamp': 1748145197,
'upload_date': '20250525',
'uploader': '日本テレビ放送網',
'uploader_id': '0x7FE2',
},
}, },
} ]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, display_id)
meta = self._search_json(r'window\.app\s*=', webpage, 'episode info', video_id, fatal=False)
episode = traverse_obj(meta, ('falcorCache', 'catalog', 'episode', video_id, 'value'))
nt_path = self._search_regex(r'<script[^>]+src=["\'](/assets/nt\.[^"\']+\.js)["\']', webpage, 'stream API config') info = traverse_obj(
nt_js = self._download_webpage(urljoin(url, nt_path), video_id, note='Downloading stream API config') self._search_json(
video_url = self._search_regex(r'videoPlaybackUrl:\s*[\'"]([^\'"]+)[\'"]', nt_js, 'stream API url') r'window\.app\s*=',
api_key = self._search_regex(r'api_key:\s*[\'"]([^\'"]+)[\'"]', nt_js, 'stream API key') webpage,
'video info',
try: display_id,
source_meta = self._download_json( ),
f'{video_url}ref:{video_id}', ('falcorCache', 'catalog', 'episode', display_id, 'value', {dict}),
video_id, default={},
headers={'X-Streaks-Api-Key': api_key}, )
note='Downloading stream metadata', media_id = traverse_obj(info, ('streaks_data', 'mediaid', {str_or_none}, {require('mediaID for Streaks')}))
) non_phonetic = (lambda _, v: v['is_phonetic'] is False, 'value', {str})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
self.raise_geo_restricted(countries=['JP'])
raise
formats, subtitles = [], {}
for src in traverse_obj(source_meta, ('sources', ..., 'src')):
fmts, subs = self._extract_m3u8_formats_and_subtitles(src, video_id, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return { return {
'title': traverse_obj(webpage, ({find_element(tag='h3')}, {clean_html})), **self._extract_from_streaks_api(
'id': video_id, 'ntv-tada',
**traverse_obj( media_id,
episode, headers={
{ 'X-Streaks-Api-Key': 'df497719056b44059a0483b8faad1f4a',
'categories': ('keywords', {list}),
'id': ('content_id', {str}),
'description': ('description', 0, 'value'),
'timestamp': ('created_at', {unified_timestamp}),
'release_timestamp': ('pub_date', {unified_timestamp}),
'duration': ('tv_episode_info', 'duration', {int_or_none}),
'episode_number': ('tv_episode_info', 'episode_number', {int_or_none}),
'episode': ('title', lambda _, v: not v.get('is_phonetic'), 'value'),
'series': ('custom_data', 'program_name'),
}, },
get_all=False,
), ),
'formats': formats, **traverse_obj(
'subtitles': subtitles, info,
{
'id': ('content_id', {str_or_none}),
'title': ('title', *non_phonetic, any),
'age_limit': ('is_adult_only_content', {lambda x: 18 if x else None}),
'cast': ('credit', ..., 'name', *non_phonetic),
'genres': ('genre', ..., {str}),
'release_timestamp': ('pub_date', {parse_iso8601}),
'tags': ('tags', ..., {str}),
'thumbnail': ('artwork', ..., 'url', any, {url_or_none}),
},
),
**traverse_obj(
info,
(
'tv_episode_info',
{
'duration': ('duration', {int_or_none}),
'episode_number': ('episode_number', {int}),
'series': ('parent_show_title', *non_phonetic, any),
'series_id': ('show_content_id', {str}),
},
),
),
**traverse_obj(
info,
(
'custom_data',
{
'description': ('program_detail', {str}),
'episode': ('episode_title', {str}),
'episode_id': ('episode_id', {str_or_none}),
'uploader': ('network_name', {str}),
'uploader_id': ('network_id', {str}),
},
),
),
} }