1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-03 11:58:32 +00:00

[ie/tbsjp] More precise metadata extraction

Co-Authored-By: doe1080 <98906116+doe1080@users.noreply.github.com>
This commit is contained in:
garret1317 2025-06-17 15:36:09 +01:00
parent 92587555ee
commit 50c54eb9bc

View File

@ -4,8 +4,9 @@
int_or_none, int_or_none,
str_or_none, str_or_none,
unified_timestamp, unified_timestamp,
url_or_none,
) )
from ..utils.traversal import find_element, traverse_obj from ..utils.traversal import traverse_obj
class TBSJPBaseIE(StreaksBaseIE): class TBSJPBaseIE(StreaksBaseIE):
@ -49,20 +50,31 @@ def _real_extract(self, url):
episode = traverse_obj(meta, ('falcorCache', 'catalog', 'episode', video_id, 'value')) episode = traverse_obj(meta, ('falcorCache', 'catalog', 'episode', video_id, 'value'))
return { return {
**self._extract_from_streaks_api('tbs', f'ref:{video_id}', headers={'Referer': 'https://cu.tbs.co.jp/'}), **self._extract_from_streaks_api(
'title': traverse_obj(webpage, ({find_element(tag='h3')}, {clean_html})), 'tbs', f'ref:{video_id}', headers={'Referer': 'https://cu.tbs.co.jp/'}),
'id': video_id,
**traverse_obj(episode, { **traverse_obj(episode, {
'categories': ('keywords', {list}), 'title': ('title', ..., 'value', {str}, any),
'id': ('content_id', {str}), 'cast': ('credit', ..., 'name', ..., 'value', {str}, any, {lambda x: x.split(',')}, filter),
'description': ('description', 0, 'value'), 'categories': ('keywords', ..., {str}, filter, all, filter),
'timestamp': ('created_at', {unified_timestamp}), 'description': ('description', ..., 'value', {clean_html}, any),
'release_timestamp': ('pub_date', {unified_timestamp}),
'duration': ('tv_episode_info', 'duration', {int_or_none}), 'duration': ('tv_episode_info', 'duration', {int_or_none}),
'episode': ('title', lambda _, v: not v.get('is_phonetic'), 'value', {str}, any),
'episode_id': ('content_id', {str}),
'episode_number': ('tv_episode_info', 'episode_number', {int_or_none}), 'episode_number': ('tv_episode_info', 'episode_number', {int_or_none}),
'episode': ('title', lambda _, v: not v.get('is_phonetic'), 'value'), 'genres': ('genre', ..., {str}, filter, all, filter),
'series': ('custom_data', 'program_name'), 'release_timestamp': ('pub_date', {unified_timestamp}),
}, get_all=False), 'series': ('custom_data', 'program_name', {str}),
'tags': ('tags', ..., {str}, filter, all, filter),
'thumbnail': ('artwork', ..., 'url', {url_or_none}, any),
'timestamp': ('created_at', {unified_timestamp}),
'uploader': ('tv_show_info', 'networks', ..., {str}, any),
}),
**traverse_obj(episode, ('tv_episode_info', {
'duration': ('duration', {int_or_none}),
'episode_number': ('episode_number', {int_or_none}),
'series_id': ('show_content_id', {str}),
})),
'id': video_id,
} }