1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-15 12:45:27 +00:00

[ie] Improve JSON LD metadata extraction

This commit is contained in:
doe1080
2025-06-05 10:26:34 +09:00
parent 4e7c1ea346
commit 0dab25e435
2 changed files with 70 additions and 6 deletions

View File

@@ -1741,11 +1741,20 @@ class InfoExtractor:
'timestamp': unified_timestamp(e.get('dateCreated')),
})
elif is_type(e, 'Article', 'NewsArticle'):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')),
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
})
info.update(**traverse_obj(e, {
'title': ('headline', {str}, {unescapeHTML}),
'categories': ('articleSection', {str}, {unescapeHTML}, filter, all),
'creators': ('author', (None, 'name'), {str}, {unescapeHTML}, filter, all),
'description': (('description', 'articleBody'), {str}, {unescapeHTML}, any),
'modified_timestamp': ('dateModified', {parse_iso8601}),
'release_timestamp': ('datePublished', {parse_iso8601}),
'tags': ('keywords', {str}, {unescapeHTML}, filter, all),
'thumbnails': ('image', ..., {
'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}),
}),
'timestamp': ('dateCreated', {parse_iso8601}),
'uploader': ('publisher', 'name', {str}, {unescapeHTML}),
}))
if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
extract_video_object(e['video'][0])
elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):