mirror of
https://github.com/iv-org/invidious.git
synced 2025-07-14 17:38:29 +00:00
Update extractors.cr
This commit is contained in:
parent
e9cc794c5e
commit
fc913c0cd3
@ -21,6 +21,7 @@ private ITEM_PARSERS = {
|
|||||||
Parsers::ItemSectionRendererParser,
|
Parsers::ItemSectionRendererParser,
|
||||||
Parsers::ContinuationItemRendererParser,
|
Parsers::ContinuationItemRendererParser,
|
||||||
Parsers::HashtagRendererParser,
|
Parsers::HashtagRendererParser,
|
||||||
|
Parsers::LockupViewModelParser,
|
||||||
}
|
}
|
||||||
|
|
||||||
private alias InitialData = Hash(String, JSON::Any)
|
private alias InitialData = Hash(String, JSON::Any)
|
||||||
@ -108,21 +109,30 @@ private module Parsers
|
|||||||
length_seconds = 0
|
length_seconds = 0
|
||||||
end
|
end
|
||||||
|
|
||||||
live_now = false
|
|
||||||
premium = false
|
|
||||||
|
|
||||||
premiere_timestamp = item_contents.dig?("upcomingEventData", "startTime").try { |t| Time.unix(t.as_s.to_i64) }
|
premiere_timestamp = item_contents.dig?("upcomingEventData", "startTime").try { |t| Time.unix(t.as_s.to_i64) }
|
||||||
|
badges = VideoBadges::None
|
||||||
item_contents["badges"]?.try &.as_a.each do |badge|
|
item_contents["badges"]?.try &.as_a.each do |badge|
|
||||||
b = badge["metadataBadgeRenderer"]
|
b = badge["metadataBadgeRenderer"]
|
||||||
case b["label"].as_s
|
case b["label"].as_s
|
||||||
when "LIVE NOW"
|
when "LIVE"
|
||||||
live_now = true
|
badges |= VideoBadges::LiveNow
|
||||||
when "New", "4K", "CC"
|
when "New"
|
||||||
# TODO
|
badges |= VideoBadges::New
|
||||||
|
when "4K"
|
||||||
|
badges |= VideoBadges::FourK
|
||||||
|
when "8K"
|
||||||
|
badges |= VideoBadges::EightK
|
||||||
|
when "VR180"
|
||||||
|
badges |= VideoBadges::VR180
|
||||||
|
when "360°"
|
||||||
|
badges |= VideoBadges::VR360
|
||||||
|
when "3D"
|
||||||
|
badges |= VideoBadges::ThreeD
|
||||||
|
when "CC"
|
||||||
|
badges |= VideoBadges::ClosedCaptions
|
||||||
when "Premium"
|
when "Premium"
|
||||||
# TODO: Potentially available as item_contents["topStandaloneBadge"]["metadataBadgeRenderer"]
|
# TODO: Potentially available as item_contents["topStandaloneBadge"]["metadataBadgeRenderer"]
|
||||||
premium = true
|
badges |= VideoBadges::Premium
|
||||||
else nil # Ignore
|
else nil # Ignore
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -136,10 +146,9 @@ private module Parsers
|
|||||||
views: view_count,
|
views: view_count,
|
||||||
description_html: description_html,
|
description_html: description_html,
|
||||||
length_seconds: length_seconds,
|
length_seconds: length_seconds,
|
||||||
live_now: live_now,
|
|
||||||
premium: premium,
|
|
||||||
premiere_timestamp: premiere_timestamp,
|
premiere_timestamp: premiere_timestamp,
|
||||||
author_verified: author_verified,
|
author_verified: author_verified,
|
||||||
|
badges: badges,
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -459,9 +468,9 @@ private module Parsers
|
|||||||
# Parses an InnerTube richItemRenderer into a SearchVideo.
|
# Parses an InnerTube richItemRenderer into a SearchVideo.
|
||||||
# Returns nil when the given object isn't a RichItemRenderer
|
# Returns nil when the given object isn't a RichItemRenderer
|
||||||
#
|
#
|
||||||
# A richItemRenderer seems to be a simple wrapper for a videoRenderer, used
|
# A richItemRenderer seems to be a simple wrapper for a various other types,
|
||||||
# by the result page for hashtags and for the podcast tab on channels.
|
# used on the hashtags result page and the channel podcast tab. It is located
|
||||||
# It is located inside a continuationItems container for hashtags.
|
# itself inside a richGridRenderer container.
|
||||||
#
|
#
|
||||||
module RichItemRendererParser
|
module RichItemRendererParser
|
||||||
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
|
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
|
||||||
@ -474,6 +483,8 @@ private module Parsers
|
|||||||
child = VideoRendererParser.process(item_contents, author_fallback)
|
child = VideoRendererParser.process(item_contents, author_fallback)
|
||||||
child ||= ReelItemRendererParser.process(item_contents, author_fallback)
|
child ||= ReelItemRendererParser.process(item_contents, author_fallback)
|
||||||
child ||= PlaylistRendererParser.process(item_contents, author_fallback)
|
child ||= PlaylistRendererParser.process(item_contents, author_fallback)
|
||||||
|
child ||= LockupViewModelParser.process(item_contents, author_fallback)
|
||||||
|
child ||= ShortsLockupViewModelParser.process(item_contents, author_fallback)
|
||||||
return child
|
return child
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -488,6 +499,9 @@ private module Parsers
|
|||||||
# reelItemRenderer items are used in the new (2022) channel layout,
|
# reelItemRenderer items are used in the new (2022) channel layout,
|
||||||
# in the "shorts" tab.
|
# in the "shorts" tab.
|
||||||
#
|
#
|
||||||
|
# NOTE: As of 10/2024, it might have been fully replaced by shortsLockupViewModel
|
||||||
|
# TODO: Confirm that hypothesis
|
||||||
|
#
|
||||||
module ReelItemRendererParser
|
module ReelItemRendererParser
|
||||||
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
|
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
|
||||||
if item_contents = item["reelItemRenderer"]?
|
if item_contents = item["reelItemRenderer"]?
|
||||||
@ -563,10 +577,138 @@ private module Parsers
|
|||||||
views: view_count,
|
views: view_count,
|
||||||
description_html: "",
|
description_html: "",
|
||||||
length_seconds: duration,
|
length_seconds: duration,
|
||||||
live_now: false,
|
|
||||||
premium: false,
|
|
||||||
premiere_timestamp: Time.unix(0),
|
premiere_timestamp: Time.unix(0),
|
||||||
author_verified: false,
|
author_verified: false,
|
||||||
|
badges: VideoBadges::None,
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.parser_name
|
||||||
|
return {{@type.name}}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses an InnerTube lockupViewModel into a SearchPlaylist.
|
||||||
|
# Returns nil when the given object is not a lockupViewModel.
|
||||||
|
#
|
||||||
|
# This structure is present since November 2024 on the "podcasts" and
|
||||||
|
# "playlists" tabs of the channel page. It is usually encapsulated in either
|
||||||
|
# a richItemRenderer or a richGridRenderer.
|
||||||
|
#
|
||||||
|
module LockupViewModelParser
|
||||||
|
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
|
||||||
|
if item_contents = item["lockupViewModel"]?
|
||||||
|
return self.parse(item_contents, author_fallback)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private def self.parse(item_contents, author_fallback)
|
||||||
|
playlist_id = item_contents["contentId"].as_s
|
||||||
|
|
||||||
|
thumbnail_view_model = item_contents.dig(
|
||||||
|
"contentImage", "collectionThumbnailViewModel",
|
||||||
|
"primaryThumbnail", "thumbnailViewModel"
|
||||||
|
)
|
||||||
|
|
||||||
|
thumbnail = thumbnail_view_model.dig("image", "sources", 0, "url").as_s
|
||||||
|
|
||||||
|
# This complicated sequences tries to extract the following data structure:
|
||||||
|
# "overlays": [{
|
||||||
|
# "thumbnailOverlayBadgeViewModel": {
|
||||||
|
# "thumbnailBadges": [{
|
||||||
|
# "thumbnailBadgeViewModel": {
|
||||||
|
# "text": "430 episodes",
|
||||||
|
# "badgeStyle": "THUMBNAIL_OVERLAY_BADGE_STYLE_DEFAULT"
|
||||||
|
# }
|
||||||
|
# }]
|
||||||
|
# }
|
||||||
|
# }]
|
||||||
|
#
|
||||||
|
# NOTE: this simplistic `.to_i` conversion might not work on larger
|
||||||
|
# playlists and hasn't been tested.
|
||||||
|
video_count = thumbnail_view_model.dig("overlays").as_a
|
||||||
|
.compact_map(&.dig?("thumbnailOverlayBadgeViewModel", "thumbnailBadges").try &.as_a)
|
||||||
|
.flatten
|
||||||
|
.find(nil, &.dig?("thumbnailBadgeViewModel", "text").try { |node|
|
||||||
|
{"episodes", "videos"}.any? { |str| node.as_s.ends_with?(str) }
|
||||||
|
})
|
||||||
|
.try &.dig("thumbnailBadgeViewModel", "text").as_s.to_i(strict: false)
|
||||||
|
|
||||||
|
metadata = item_contents.dig("metadata", "lockupMetadataViewModel")
|
||||||
|
title = metadata.dig("title", "content").as_s
|
||||||
|
|
||||||
|
# TODO: Retrieve "updated" info from metadata parts
|
||||||
|
# rows = metadata.dig("metadata", "contentMetadataViewModel", "metadataRows").as_a
|
||||||
|
# parts_text = rows.map(&.dig?("metadataParts", "text", "content").try &.as_s)
|
||||||
|
# One of these parts should contain a string like: "Updated 2 days ago"
|
||||||
|
|
||||||
|
# TODO: Maybe add a button to access the first video of the playlist?
|
||||||
|
# item_contents.dig("rendererContext", "commandContext", "onTap", "innertubeCommand", "watchEndpoint")
|
||||||
|
# Available fields: "videoId", "playlistId", "params"
|
||||||
|
|
||||||
|
return SearchPlaylist.new({
|
||||||
|
title: title,
|
||||||
|
id: playlist_id,
|
||||||
|
author: author_fallback.name,
|
||||||
|
ucid: author_fallback.id,
|
||||||
|
video_count: video_count || -1,
|
||||||
|
videos: [] of SearchPlaylistVideo,
|
||||||
|
thumbnail: thumbnail,
|
||||||
|
author_verified: false,
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.parser_name
|
||||||
|
return {{@type.name}}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses an InnerTube shortsLockupViewModel into a SearchVideo.
|
||||||
|
# Returns nil when the given object is not a shortsLockupViewModel.
|
||||||
|
#
|
||||||
|
# This structure is present since around October 2024 on the "shorts" tab of
|
||||||
|
# the channel page and likely replaces the reelItemRenderer structure. It is
|
||||||
|
# usually (always?) encapsulated in a richItemRenderer.
|
||||||
|
#
|
||||||
|
module ShortsLockupViewModelParser
|
||||||
|
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
|
||||||
|
if item_contents = item["shortsLockupViewModel"]?
|
||||||
|
return self.parse(item_contents, author_fallback)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private def self.parse(item_contents, author_fallback)
|
||||||
|
# TODO: Maybe add support for "oardefault.jpg" thumbnails?
|
||||||
|
# thumbnail = item_contents.dig("thumbnail", "sources", 0, "url").as_s
|
||||||
|
# Gives: https://i.ytimg.com/vi/{video_id}/oardefault.jpg?...
|
||||||
|
|
||||||
|
video_id = item_contents.dig(
|
||||||
|
"onTap", "innertubeCommand", "reelWatchEndpoint", "videoId"
|
||||||
|
).as_s
|
||||||
|
|
||||||
|
title = item_contents.dig("overlayMetadata", "primaryText", "content").as_s
|
||||||
|
|
||||||
|
view_count = short_text_to_number(
|
||||||
|
item_contents.dig("overlayMetadata", "secondaryText", "content").as_s
|
||||||
|
)
|
||||||
|
|
||||||
|
# Approximate to one minute, as "shorts" generally don't exceed that.
|
||||||
|
# NOTE: The actual duration is not provided by Youtube anymore.
|
||||||
|
# TODO: Maybe use -1 as an error value and handle that on the frontend?
|
||||||
|
duration = 60_i32
|
||||||
|
|
||||||
|
SearchVideo.new({
|
||||||
|
title: title,
|
||||||
|
id: video_id,
|
||||||
|
author: author_fallback.name,
|
||||||
|
ucid: author_fallback.id,
|
||||||
|
published: Time.unix(0),
|
||||||
|
views: view_count,
|
||||||
|
description_html: "",
|
||||||
|
length_seconds: duration,
|
||||||
|
premiere_timestamp: Time.unix(0),
|
||||||
|
author_verified: false,
|
||||||
|
badges: VideoBadges::None,
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user