Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Fijxu
2024-11-09 23:42:52 -03:00
15 changed files with 347 additions and 152 deletions

View File

@@ -1,78 +1,3 @@
def produce_channel_content_continuation(ucid, content_type, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object_inner_2 = {
"2:0:embedded" => {
"1:0:varint" => 0_i64,
},
"5:varint" => 50_i64,
"6:varint" => 1_i64,
"7:varint" => (page * 30).to_i64,
"9:varint" => 1_i64,
"10:varint" => 0_i64,
}
object_inner_2_encoded = object_inner_2
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
content_type_numerical =
case content_type
when "videos" then 15
when "livestreams" then 14
else 15 # Fallback to "videos"
end
sort_by_numerical =
case sort_by
when "newest" then 1_i64
when "popular" then 2_i64
when "oldest" then 4_i64
else 1_i64 # Fallback to "newest"
end
object_inner_1 = {
"110:embedded" => {
"3:embedded" => {
"#{content_type_numerical}:embedded" => {
"1:embedded" => {
"1:string" => object_inner_2_encoded,
},
"2:embedded" => {
"1:string" => "00000000-0000-0000-0000-000000000000",
},
"3:varint" => sort_by_numerical,
},
},
},
}
object_inner_1_encoded = object_inner_1
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:string" => object_inner_1_encoded,
"35:string" => "browse-feed#{ucid}videos102",
},
}
continuation = object.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
def make_initial_content_ctoken(ucid, content_type, sort_by) : String
return produce_channel_content_continuation(ucid, content_type, sort_by: sort_by)
end
module Invidious::Channel::Tabs
extend self
@@ -101,7 +26,7 @@ module Invidious::Channel::Tabs
end
def get_videos(author : String, ucid : String, *, continuation : String? = nil, sort_by = "newest")
continuation ||= make_initial_content_ctoken(ucid, "videos", sort_by)
continuation ||= make_initial_videos_ctoken(ucid, sort_by)
initial_data = YoutubeAPI.browse(continuation: continuation)
return extract_items(initial_data, author, ucid)
@@ -130,14 +55,10 @@ module Invidious::Channel::Tabs
# Shorts
# -------------------
def get_shorts(channel : AboutChannel, continuation : String? = nil)
if continuation.nil?
# EgZzaG9ydHPyBgUKA5oBAA%3D%3D is the protobuf object to load "shorts"
# TODO: try to extract the continuation tokens that allows other sorting options
initial_data = YoutubeAPI.browse(channel.ucid, params: "EgZzaG9ydHPyBgUKA5oBAA%3D%3D")
else
initial_data = YoutubeAPI.browse(continuation: continuation)
end
def get_shorts(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
continuation ||= make_initial_shorts_ctoken(channel.ucid, sort_by)
initial_data = YoutubeAPI.browse(continuation: continuation)
return extract_items(initial_data, channel.author, channel.ucid)
end
@@ -145,9 +66,8 @@ module Invidious::Channel::Tabs
# Livestreams
# -------------------
def get_livestreams(channel : AboutChannel, continuation : String? = nil, sort_by = "newest")
continuation ||= make_initial_content_ctoken(channel.ucid, "livestreams", sort_by)
def get_livestreams(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
continuation ||= make_initial_livestreams_ctoken(channel.ucid, sort_by)
initial_data = YoutubeAPI.browse(continuation: continuation)
return extract_items(initial_data, channel.author, channel.ucid)
@@ -171,4 +91,102 @@ module Invidious::Channel::Tabs
return items, next_continuation
end
# -------------------
# C-tokens
# -------------------
private def sort_options_videos_short(sort_by : String)
case sort_by
when "newest" then return 4_i64
when "popular" then return 2_i64
when "oldest" then return 5_i64
else return 4_i64 # Fallback to "newest"
end
end
# Generate the initial "continuation token" to get the first page of the
# "videos" tab. The following page requires the ctoken provided in that
# first page, and so on.
private def make_initial_videos_ctoken(ucid : String, sort_by = "newest")
object = {
"15:embedded" => {
"2:embedded" => {
"1:string" => "00000000-0000-0000-0000-000000000000",
},
"4:varint" => sort_options_videos_short(sort_by),
},
}
return channel_ctoken_wrap(ucid, object)
end
# Generate the initial "continuation token" to get the first page of the
# "shorts" tab. The following page requires the ctoken provided in that
# first page, and so on.
private def make_initial_shorts_ctoken(ucid : String, sort_by = "newest")
object = {
"10:embedded" => {
"2:embedded" => {
"1:string" => "00000000-0000-0000-0000-000000000000",
},
"4:varint" => sort_options_videos_short(sort_by),
},
}
return channel_ctoken_wrap(ucid, object)
end
# Generate the initial "continuation token" to get the first page of the
# "livestreams" tab. The following page requires the ctoken provided in that
# first page, and so on.
private def make_initial_livestreams_ctoken(ucid : String, sort_by = "newest")
sort_by_numerical =
case sort_by
when "newest" then 12_i64
when "popular" then 14_i64
when "oldest" then 13_i64
else 12_i64 # Fallback to "newest"
end
object = {
"14:embedded" => {
"2:embedded" => {
"1:string" => "00000000-0000-0000-0000-000000000000",
},
"5:varint" => sort_by_numerical,
},
}
return channel_ctoken_wrap(ucid, object)
end
# The protobuf structure common between videos/shorts/livestreams
private def channel_ctoken_wrap(ucid : String, object)
object_inner = {
"110:embedded" => {
"3:embedded" => object,
},
}
object_inner_encoded = object_inner
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:string" => object_inner_encoded,
},
}
continuation = object.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
end

View File

@@ -1,8 +1,22 @@
# Languages requiring a better level of translation (at least 20%)
# to be added to the list below:
#
# "af" => "", # Afrikaans
# "az" => "", # Azerbaijani
# "be" => "", # Belarusian
# "bn_BD" => "", # Bengali (Bangladesh)
# "ia" => "", # Interlingua
# "or" => "", # Odia
# "tk" => "", # Turkmen
# "tok => "", # Toki Pona
#
LOCALES_LIST = {
"ar" => "العربية", # Arabic
"bg" => "български", # Bulgarian
"bn" => "বাংলা", # Bengali
"ca" => "Català", # Catalan
"cs" => "Čeština", # Czech
"cy" => "Cymraeg", # Welsh
"da" => "Dansk", # Danish
"de" => "Deutsch", # German
"el" => "Ελληνικά", # Greek
@@ -23,6 +37,7 @@ LOCALES_LIST = {
"it" => "Italiano", # Italian
"ja" => "日本語", # Japanese
"ko" => "한국어", # Korean
"lmo" => "Lombard", # Lombard
"lt" => "Lietuvių", # Lithuanian
"nb-NO" => "Norsk bokmål", # Norwegian Bokmål
"nl" => "Nederlands", # Dutch

View File

@@ -12,7 +12,9 @@ enum LogLevel
end
class Invidious::LogHandler < Kemal::BaseLogHandler
def initialize(@io : IO = STDOUT, @level = LogLevel::Debug, @color : Bool = true)
def initialize(@io : IO = STDOUT, @level = LogLevel::Debug, use_color : Bool = true)
Colorize.enabled = use_color
Colorize.on_tty_only!
end
def call(context : HTTP::Server::Context)
@@ -56,8 +58,7 @@ class Invidious::LogHandler < Kemal::BaseLogHandler
{% for level in %w(trace debug info warn error fatal) %}
def {{level.id}}(message : String)
if LogLevel::{{level.id.capitalize}} >= @level
puts("#{Time.utc} [{{level.id}}] #{message}".colorize(color(LogLevel::{{level.id.capitalize}})).toggle(@color))
puts("#{Time.utc} [{{level.id}}] #{message}".colorize(color(LogLevel::{{level.id.capitalize}})))
end
end
{% end %}

View File

@@ -31,9 +31,7 @@ module Invidious::Routes::API::V1::Search
query = env.params.query["q"]? || ""
begin
client = HTTP::Client.new("suggestqueries-clients6.youtube.com")
client.before_request { |r| add_yt_headers(r) }
client = make_client(URI.parse("https://suggestqueries-clients6.youtube.com"), force_youtube_headers: true)
url = "/complete/search?client=youtube&hl=en&gl=#{region}&q=#{URI.encode_www_form(query)}&gs_ri=youtube&ds=yt"
response = client.get(url).body

View File

@@ -20,10 +20,11 @@ module Invidious::Routes::Channels
sort_by = env.params.query["sort_by"]?.try &.downcase
if channel.auto_generated
sort_by ||= "last"
sort_options = {"last", "oldest", "newest"}
items, next_continuation = fetch_channel_playlists(
channel.ucid, channel.author, continuation, (sort_by || "last")
channel.ucid, channel.author, continuation, sort_by
)
items.uniq! do |item|
@@ -49,9 +50,11 @@ module Invidious::Routes::Channels
end
next_continuation = nil
else
sort_by ||= "newest"
sort_options = {"newest", "oldest", "popular"}
items, next_continuation = Channel::Tabs.get_videos(
channel, continuation: continuation, sort_by: (sort_by || "newest")
items, next_continuation = Channel::Tabs.get_60_videos(
channel, continuation: continuation, sort_by: sort_by
)
end
end
@@ -82,13 +85,12 @@ module Invidious::Routes::Channels
end
next_continuation = nil
else
# TODO: support sort option for shorts
sort_by = ""
sort_options = [] of String
sort_by = env.params.query["sort_by"]?.try &.downcase || "newest"
sort_options = {"newest", "oldest", "popular"}
# Fetch items and continuation token
items, next_continuation = Channel::Tabs.get_shorts(
channel, continuation: continuation
channel, continuation: continuation, sort_by: sort_by
)
end

View File

@@ -47,11 +47,7 @@ module Invidious::Routes::VideoPlayback
headers["Range"] = "bytes=#{range_for_head}"
end
headers["Origin"] = "https://www.youtube.com"
headers["Referer"] = "https://www.youtube.com/"
headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
client = make_client(URI.parse(host), region, force_resolve = true)
client = make_client(URI.parse(host), region, force_resolve: true)
response = HTTP::Client::Response.new(500)
error = ""
5.times do
@@ -66,7 +62,7 @@ module Invidious::Routes::VideoPlayback
if new_host != host
host = new_host
client.close
client = make_client(URI.parse(new_host), region, force_resolve = true)
client = make_client(URI.parse(new_host), region, force_resolve: true)
end
url = "#{location.request_target}&host=#{location.host}#{region ? "&region=#{region}" : ""}"
@@ -80,7 +76,7 @@ module Invidious::Routes::VideoPlayback
fvip = "3"
host = "https://r#{fvip}---#{mn}.googlevideo.com"
client = make_client(URI.parse(host), region, force_resolve = true)
client = make_client(URI.parse(host), region, force_resolve: true)
rescue ex
error = ex.message
end
@@ -205,7 +201,7 @@ module Invidious::Routes::VideoPlayback
break
else
client.close
client = make_client(URI.parse(host), region, force_resolve = true)
client = make_client(URI.parse(host), region, force_resolve: true)
end
end

View File

@@ -22,12 +22,8 @@ struct YoutubeConnectionPool
response = yield conn
rescue ex
conn.close
conn = make_client(url, force_resolve: true)
conn = HTTP::Client.new(url)
conn.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy
conn.family = CONFIG.force_resolve
conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC
conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com"
response = yield conn
ensure
pool.release(conn)
@@ -37,12 +33,15 @@ struct YoutubeConnectionPool
end
private def build_pool
DB::Pool(HTTP::Client).new(initial_pool_size: 0, max_pool_size: capacity, max_idle_pool_size: capacity, checkout_timeout: timeout) do
conn = HTTP::Client.new(url)
conn.family = CONFIG.force_resolve
conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC
conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com"
conn
options = DB::Pool::Options.new(
initial_pool_size: 0,
max_pool_size: capacity,
max_idle_pool_size: capacity,
checkout_timeout: timeout
)
DB::Pool(HTTP::Client).new(options) do
next make_client(url, force_resolve: true)
end
end
end
@@ -62,15 +61,17 @@ def add_yt_headers(request)
end
end
def make_client(url : URI, region = nil, force_resolve : Bool = false)
def make_client(url : URI, region = nil, force_resolve : Bool = false, force_youtube_headers : Bool = false)
client = HTTP::Client.new(url)
client.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy
# Force the usage of a specific configured IP Family
if force_resolve
client.family = CONFIG.force_resolve
client.family = Socket::Family::INET if client.family == Socket::Family::UNSPEC
end
client.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com"
client.before_request { |r| add_yt_headers(r) } if url.host.try &.ends_with?("youtube.com") || force_youtube_headers
client.read_timeout = 10.seconds
client.connect_timeout = 10.seconds
@@ -78,7 +79,7 @@ def make_client(url : URI, region = nil, force_resolve : Bool = false)
end
def make_client(url : URI, region = nil, force_resolve : Bool = false, &)
client = make_client(url, region, force_resolve)
client = make_client(url, region, force_resolve: force_resolve)
begin
yield client
ensure

View File

@@ -21,6 +21,7 @@ private ITEM_PARSERS = {
Parsers::ItemSectionRendererParser,
Parsers::ContinuationItemRendererParser,
Parsers::HashtagRendererParser,
Parsers::LockupViewModelParser,
}
private alias InitialData = Hash(String, JSON::Any)
@@ -467,9 +468,9 @@ private module Parsers
# Parses an InnerTube richItemRenderer into a SearchVideo.
# Returns nil when the given object isn't a RichItemRenderer
#
# A richItemRenderer seems to be a simple wrapper for a videoRenderer, used
# by the result page for hashtags and for the podcast tab on channels.
# It is located inside a continuationItems container for hashtags.
# A richItemRenderer seems to be a simple wrapper for a various other types,
# used on the hashtags result page and the channel podcast tab. It is located
# itself inside a richGridRenderer container.
#
module RichItemRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
@@ -482,6 +483,8 @@ private module Parsers
child = VideoRendererParser.process(item_contents, author_fallback)
child ||= ReelItemRendererParser.process(item_contents, author_fallback)
child ||= PlaylistRendererParser.process(item_contents, author_fallback)
child ||= LockupViewModelParser.process(item_contents, author_fallback)
child ||= ShortsLockupViewModelParser.process(item_contents, author_fallback)
return child
end
@@ -496,6 +499,9 @@ private module Parsers
# reelItemRenderer items are used in the new (2022) channel layout,
# in the "shorts" tab.
#
# NOTE: As of 10/2024, it might have been fully replaced by shortsLockupViewModel
# TODO: Confirm that hypothesis
#
module ReelItemRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item["reelItemRenderer"]?
@@ -582,6 +588,135 @@ private module Parsers
end
end
# Parses an InnerTube lockupViewModel into a SearchPlaylist.
# Returns nil when the given object is not a lockupViewModel.
#
# This structure is present since November 2024 on the "podcasts" and
# "playlists" tabs of the channel page. It is usually encapsulated in either
# a richItemRenderer or a richGridRenderer.
#
module LockupViewModelParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item["lockupViewModel"]?
return self.parse(item_contents, author_fallback)
end
end
private def self.parse(item_contents, author_fallback)
playlist_id = item_contents["contentId"].as_s
thumbnail_view_model = item_contents.dig(
"contentImage", "collectionThumbnailViewModel",
"primaryThumbnail", "thumbnailViewModel"
)
thumbnail = thumbnail_view_model.dig("image", "sources", 0, "url").as_s
# This complicated sequences tries to extract the following data structure:
# "overlays": [{
# "thumbnailOverlayBadgeViewModel": {
# "thumbnailBadges": [{
# "thumbnailBadgeViewModel": {
# "text": "430 episodes",
# "badgeStyle": "THUMBNAIL_OVERLAY_BADGE_STYLE_DEFAULT"
# }
# }]
# }
# }]
#
# NOTE: this simplistic `.to_i` conversion might not work on larger
# playlists and hasn't been tested.
video_count = thumbnail_view_model.dig("overlays").as_a
.compact_map(&.dig?("thumbnailOverlayBadgeViewModel", "thumbnailBadges").try &.as_a)
.flatten
.find(nil, &.dig?("thumbnailBadgeViewModel", "text").try { |node|
{"episodes", "videos"}.any? { |str| node.as_s.ends_with?(str) }
})
.try &.dig("thumbnailBadgeViewModel", "text").as_s.to_i(strict: false)
metadata = item_contents.dig("metadata", "lockupMetadataViewModel")
title = metadata.dig("title", "content").as_s
# TODO: Retrieve "updated" info from metadata parts
# rows = metadata.dig("metadata", "contentMetadataViewModel", "metadataRows").as_a
# parts_text = rows.map(&.dig?("metadataParts", "text", "content").try &.as_s)
# One of these parts should contain a string like: "Updated 2 days ago"
# TODO: Maybe add a button to access the first video of the playlist?
# item_contents.dig("rendererContext", "commandContext", "onTap", "innertubeCommand", "watchEndpoint")
# Available fields: "videoId", "playlistId", "params"
return SearchPlaylist.new({
title: title,
id: playlist_id,
author: author_fallback.name,
ucid: author_fallback.id,
video_count: video_count || -1,
videos: [] of SearchPlaylistVideo,
thumbnail: thumbnail,
author_verified: false,
})
end
def self.parser_name
return {{@type.name}}
end
end
# Parses an InnerTube shortsLockupViewModel into a SearchVideo.
# Returns nil when the given object is not a shortsLockupViewModel.
#
# This structure is present since around October 2024 on the "shorts" tab of
# the channel page and likely replaces the reelItemRenderer structure. It is
# usually (always?) encapsulated in a richItemRenderer.
#
module ShortsLockupViewModelParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item["shortsLockupViewModel"]?
return self.parse(item_contents, author_fallback)
end
end
private def self.parse(item_contents, author_fallback)
# TODO: Maybe add support for "oardefault.jpg" thumbnails?
# thumbnail = item_contents.dig("thumbnail", "sources", 0, "url").as_s
# Gives: https://i.ytimg.com/vi/{video_id}/oardefault.jpg?...
video_id = item_contents.dig(
"onTap", "innertubeCommand", "reelWatchEndpoint", "videoId"
).as_s
title = item_contents.dig("overlayMetadata", "primaryText", "content").as_s
view_count = short_text_to_number(
item_contents.dig("overlayMetadata", "secondaryText", "content").as_s
)
# Approximate to one minute, as "shorts" generally don't exceed that.
# NOTE: The actual duration is not provided by Youtube anymore.
# TODO: Maybe use -1 as an error value and handle that on the frontend?
duration = 60_i32
SearchVideo.new({
title: title,
id: video_id,
author: author_fallback.name,
ucid: author_fallback.id,
published: Time.unix(0),
views: view_count,
description_html: "",
length_seconds: duration,
premiere_timestamp: Time.unix(0),
author_verified: false,
badges: VideoBadges::None,
})
end
def self.parser_name
return {{@type.name}}
end
end
# Parses an InnerTube continuationItemRenderer into a Continuation.
# Returns nil when the given object isn't a continuationItemRenderer.
#

View File

@@ -639,6 +639,11 @@ module YoutubeAPI
# Send the POST request
body = YT_POOL.client() do |client|
client.post(url, headers: headers, body: data.to_json) do |response|
if response.status_code != 200
raise InfoException.new("Error: non 200 status code. Youtube API returned \
status code #{response.status_code}. See <a href=\"https://docs.invidious.io/youtube-errors-explained/\"> \
https://docs.invidious.io/youtube-errors-explained/</a> for troubleshooting.")
end
self._decompress(response.body_io, response.headers["Content-Encoding"]?)
end
end