1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 17:08:32 +00:00
This commit is contained in:
doe1080 2025-06-19 11:53:38 +09:00
parent f5091a346a
commit a59c0bd477
2 changed files with 8 additions and 8 deletions

View File

@ -373,7 +373,7 @@ def test_search_json_ld_realworld(self):
'title': 'md5:3f077843a74f01f768bbf0853c210855', 'title': 'md5:3f077843a74f01f768bbf0853c210855',
'categories': ['Reportages'], 'categories': ['Reportages'],
'creators': ['Sabine Dupont'], 'creators': ['Sabine Dupont'],
'description': 'md5:40eaf402631e0a77d8d74f66574bb978', 'description': 'md5:1dc04a3aa56c5228503071baa8b4cc97',
'modified_timestamp': 1747319520, 'modified_timestamp': 1747319520,
'release_timestamp': 1747319520, 'release_timestamp': 1747319520,
'tags': 'count:1', 'tags': 'count:1',

View File

@ -1743,19 +1743,19 @@ def traverse_json_ld(json_ld, at_top_level=True):
}) })
elif is_type(e, 'Article', 'NewsArticle'): elif is_type(e, 'Article', 'NewsArticle'):
info.update(**traverse_obj(e, { info.update(**traverse_obj(e, {
'title': ('headline', {str}, {unescapeHTML}), 'title': ('headline', {clean_html}, filter),
'alt_title': ('alternativeHeadline', {str}, {unescapeHTML}), 'alt_title': ('alternativeHeadline', {clean_html}, filter),
'categories': ('articleSection', {str}, {unescapeHTML}, filter, all, filter), 'categories': ('articleSection', {clean_html}, filter, all, filter),
'creators': ('author', (None, 'name'), {str}, {unescapeHTML}, filter, all, filter), 'creators': ('author', (None, 'name'), {clean_html}, filter, all, filter),
'description': (('description', 'articleBody'), {str}, {unescapeHTML}, any), 'description': (('description', 'articleBody'), {clean_html}, filter, any),
'modified_timestamp': ('dateModified', {parse_iso8601}), 'modified_timestamp': ('dateModified', {parse_iso8601}),
'release_timestamp': ('datePublished', {parse_iso8601}), 'release_timestamp': ('datePublished', {parse_iso8601}),
'tags': ('keywords', {str}, {unescapeHTML}, {lambda x: x.split(',')}, filter), 'tags': ('keywords', {clean_html}, {lambda x: x.split(',')}, ..., {str.strip}, filter, all, filter),
'thumbnails': ('image', ..., { 'thumbnails': ('image', ..., {
'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}), 'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}),
}), }),
'timestamp': ('dateCreated', {parse_iso8601}), 'timestamp': ('dateCreated', {parse_iso8601}),
'uploader': ('publisher', 'name', {str}, {unescapeHTML}), 'uploader': ('publisher', 'name', {clean_html}, filter),
})) }))
if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'): if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
extract_video_object(e['video'][0]) extract_video_object(e['video'][0])