Merge remote-tracking branch 'upstream/master' into master

This commit is contained in:
Fijxu
2024-10-31 20:22:59 -03:00
17 changed files with 202 additions and 124 deletions

View File

@@ -23,6 +23,7 @@ require "kilt"
require "./ext/kemal_content_for.cr"
require "./ext/kemal_static_file_handler.cr"
require "http_proxy"
require "athena-negotiation"
require "openssl/hmac"
require "option_parser"
@@ -114,6 +115,10 @@ SOFTWARE = {
YT_POOL = YoutubeConnectionPool.new(YT_URL, capacity: CONFIG.pool_size)
# Image request pool
GGPHT_POOL = YoutubeConnectionPool.new(URI.parse("https://yt3.ggpht.com"), capacity: CONFIG.pool_size)
# CLI
Kemal.config.extra_options do |parser|
parser.banner = "Usage: invidious [arguments]"

View File

@@ -57,6 +57,15 @@ struct ConfigPreferences
end
end
struct HTTPProxyConfig
include YAML::Serializable
property user : String
property password : String
property host : String
property port : Int32
end
class Config
include YAML::Serializable
@@ -157,6 +166,8 @@ class Config
property host_binding : String = "0.0.0.0"
# Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`)
property pool_size : Int32 = 100
# HTTP Proxy configuration
property http_proxy : HTTPProxyConfig? = nil
# Use Innertube's transcripts API instead of timedtext for closed captions
property use_innertube_for_captions : Bool = false

View File

@@ -18,6 +18,40 @@ end
class HTTP::Client
property family : Socket::Family = Socket::Family::UNSPEC
# Override stdlib to automatically initialize proxy if configured
#
# Accurate as of crystal 1.12.1
def initialize(@host : String, port = nil, tls : TLSContext = nil)
check_host_only(@host)
{% if flag?(:without_openssl) %}
if tls
raise "HTTP::Client TLS is disabled because `-D without_openssl` was passed at compile time"
end
@tls = nil
{% else %}
@tls = case tls
when true
OpenSSL::SSL::Context::Client.new
when OpenSSL::SSL::Context::Client
tls
when false, nil
nil
end
{% end %}
@port = (port || (@tls ? 443 : 80)).to_i
self.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy
end
def initialize(@io : IO, @host = "", @port = 80)
@reconnect = false
self.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy
end
private def io
io = @io
return io if io

View File

@@ -175,7 +175,6 @@ module Invidious::SigHelper
@queue = {} of TransactionID => Transaction
@conn : Connection
@uri_or_path : String
def initialize(@uri_or_path)
@@ -201,7 +200,7 @@ module Invidious::SigHelper
@conn = Connection.new(@uri_or_path)
LOGGER.info("SigHelper: Reconnected to SigHelper!")
rescue ex
LOGGER.debug("SigHelper: Reconnection to helper unsuccessful with error '#{ex.message}' retrying")
LOGGER.debug("SigHelper: Reconnection to helper unsuccessful with error '#{ex.message}'. Retrying")
sleep 500.milliseconds
next
end

View File

@@ -11,29 +11,9 @@ module Invidious::Routes::Images
end
end
# We're encapsulating this into a proc in order to easily reuse this
# portion of the code for each request block below.
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300
env.response.headers.delete("Transfer-Encoding")
return
end
proxy_file(response, env)
}
begin
HTTP::Client.get("https://yt3.ggpht.com#{url}") do |resp|
return request_proc.call(resp)
GGPHT_POOL.client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
@@ -61,27 +41,10 @@ module Invidious::Routes::Images
end
end
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Connection"] = "close"
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300
return env.response.headers.delete("Transfer-Encoding")
end
proxy_file(response, env)
}
begin
HTTP::Client.get("https://#{authority}.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
get_ytimg_pool(authority).client &.get(url, headers) do |resp|
env.response.headers["Connection"] = "close"
return self.proxy_image(env, resp)
end
rescue ex
end
@@ -101,26 +64,9 @@ module Invidious::Routes::Images
end
end
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300 && response.status_code != 404
return env.response.headers.delete("Transfer-Encoding")
end
proxy_file(response, env)
}
begin
HTTP::Client.get("https://i9.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
get_ytimg_pool("i9").client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
@@ -165,8 +111,7 @@ module Invidious::Routes::Images
if name == "maxres.jpg"
build_thumbnails(id).each do |thumb|
thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg"
# This can likely be optimized into a (small) pool sometime in the future.
if HTTP::Client.head("https://i.ytimg.com#{thumbnail_resource_path}").status_code == 200
if get_ytimg_pool("i9").client &.head(thumbnail_resource_path, headers).status_code == 200
name = thumb[:url] + ".jpg"
break
end
@@ -181,29 +126,28 @@ module Invidious::Routes::Images
end
end
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300 && response.status_code != 404
return env.response.headers.delete("Transfer-Encoding")
end
proxy_file(response, env)
}
begin
# This can likely be optimized into a (small) pool sometime in the future.
HTTP::Client.get("https://i.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
get_ytimg_pool("i").client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
end
private def self.proxy_image(env, response)
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300
return env.response.headers.delete("Transfer-Encoding")
end
return proxy_file(response, env)
end
end

View File

@@ -123,6 +123,7 @@ module Invidious::Videos
"Esperanto",
"Estonian",
"Filipino",
"Filipino (auto-generated)",
"Finnish",
"French",
"French (auto-generated)",

View File

@@ -1,17 +1,6 @@
def add_yt_headers(request)
request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal"
request.headers["User-Agent"] ||= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
request.headers["Accept-Language"] ||= "en-US,en;q=0.9"
# Preserve original cookies and add new YT consent cookie for EU servers
request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}"
if !CONFIG.cookies.empty?
request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
end
end
# Mapping of subdomain => YoutubeConnectionPool
# This is needed as we may need to access arbitrary subdomains of ytimg
private YTIMG_POOLS = {} of String => YoutubeConnectionPool
struct YoutubeConnectionPool
property! url : URI
@@ -26,12 +15,16 @@ struct YoutubeConnectionPool
def client(&)
conn = pool.checkout
# Proxy needs to be reinstated every time we get a client from the pool
conn.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy
begin
response = yield conn
rescue ex
conn.close
conn = HTTP::Client.new(url)
conn = HTTP::Client.new(url)
conn.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy
conn.family = CONFIG.force_resolve
conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC
conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com"
@@ -54,6 +47,21 @@ struct YoutubeConnectionPool
end
end
def add_yt_headers(request)
request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal"
request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
request.headers["Accept-Language"] ||= "en-us,en;q=0.5"
# Preserve original cookies and add new YT consent cookie for EU servers
request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}"
if !CONFIG.cookies.empty?
request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
end
end
def make_client(url : URI, region = nil, force_resolve : Bool = false)
client = HTTP::Client.new(url)
@@ -77,3 +85,31 @@ def make_client(url : URI, region = nil, force_resolve : Bool = false, &)
client.close
end
end
def make_configured_http_proxy_client
# This method is only called when configuration for an HTTP proxy are set
config_proxy = CONFIG.http_proxy.not_nil!
return HTTP::Proxy::Client.new(
config_proxy.host,
config_proxy.port,
username: config_proxy.user,
password: config_proxy.password,
)
end
# Fetches a HTTP pool for the specified subdomain of ytimg.com
#
# Creates a new one when the specified pool for the subdomain does not exist
def get_ytimg_pool(subdomain)
if pool = YTIMG_POOLS[subdomain]?
return pool
else
LOGGER.info("ytimg_pool: Creating a new HTTP pool for \"https://#{subdomain}.ytimg.com\"")
pool = YoutubeConnectionPool.new(URI.parse("https://#{subdomain}.ytimg.com"), capacity: CONFIG.pool_size)
YTIMG_POOLS[subdomain] = pool
return pool
end
end