From 19f1b2a9b70cae1bd79a28116120550397dbbfae Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 07:50:21 -0700 Subject: [PATCH 1/9] Replace `Kemal::StaticFileHandler` on Crystal < 1.17.0 Kemal's subclass of the stdlib `HTTP::StaticFileHandler` is not as maintained as its parent, and so misses out on many enhancements and bug fixes from upstream, which unfortunately also includes the patches for security vulnerabilities... Though this isn't necessarily Kemal's fault since the bulk of the stdlib handler's logic was done in a single big method, making any changes hard to maintain. This was fixed in Crystal 1.17.0 where the handler was refactored into many private methods, making it easier for an inheriting type to implement custom behaviors while still leveraging much of the pre-existing code. Since we don't actually use any of the Kemal specific features added by `Kemal::StaticFileHandler`, there really isn't a reason to not just create a new handler based upon the stdlib implementation instead which will address the problems mentioned above. This PR implements a new handler which inherits from the stdlib variant and overrides the helper methods added in Crystal 1.17.0 to add the caching behavior with minimal code changes. Since this new handler depends on the code in Crystal 1.17.0, it will only be applied on versions greater than or equal to 1.17.0. On older versions we'll fallback to the current monkey patched `Kemal::StaticFileHandler` --- src/ext/kemal_static_file_handler.cr | 21 +++ src/invidious.cr | 18 ++- .../http_server/static_assets_handler.cr | 138 ++++++++++++++++++ 3 files changed, 171 insertions(+), 6 deletions(-) create mode 100644 src/invidious/http_server/static_assets_handler.cr diff --git a/src/ext/kemal_static_file_handler.cr b/src/ext/kemal_static_file_handler.cr index a5f422617..c6b9a27df 100644 --- a/src/ext/kemal_static_file_handler.cr +++ b/src/ext/kemal_static_file_handler.cr @@ -1,3 +1,24 @@ +{% if compare_versions(Crystal::VERSION, "1.17.0") >= 0 %} + # Strip StaticFileHandler from the binary + # + # This allows us to compile on 1.17.0 as the compiler won't try to + # semantically check the outdated upstream code. + class Kemal::Config + private def setup_static_file_handler + end + end + + # Nullify `Kemal::StaticFileHandler` + # + # Needed until the next release of Kemal after 1.7 + class Kemal::StaticFileHandler < HTTP::StaticFileHandler + def call(context : HTTP::Server::Context) + end + end + + {% skip_file %} +{% end %} + # Since systems have a limit on number of open files (`ulimit -a`), # we serve them from memory to avoid 'Too many open files' without needing # to modify ulimit. diff --git a/src/invidious.cr b/src/invidious.cr index 197b150ca..179e67896 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -231,19 +231,25 @@ error 500 do |env, exception| error_template(500, exception) end -static_headers do |env| - env.response.headers.add("Cache-Control", "max-age=2629800") -end - # Init Kemal -public_folder "assets" - Kemal.config.powered_by_header = false add_handler FilteredCompressHandler.new add_handler APIHandler.new add_handler AuthHandler.new add_handler DenyFrame.new + +{% if compare_versions(Crystal::VERSION, "1.17.0") >= 0 %} + Kemal.config.serve_static = false + add_handler Invidious::HttpServer::StaticAssetsHandler.new("assets", directory_listing: false) +{% else %} + public_folder "assets" + + static_headers do |env| + env.response.headers.add("Cache-Control", "max-age=2629800") + end +{% end %} + add_context_storage_type(Array(String)) add_context_storage_type(Preferences) add_context_storage_type(Invidious::User) diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr new file mode 100644 index 000000000..c6137775b --- /dev/null +++ b/src/invidious/http_server/static_assets_handler.cr @@ -0,0 +1,138 @@ +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0") < 0 %} + +module Invidious::HttpServer + class StaticAssetsHandler < HTTP::StaticFileHandler + # In addition to storing the actual data of a file, it also implements the required + # getters needed for the object to imitate a `File::Stat` within `StaticFileHandler`. + # + # Since the `File::Stat` is created once in `#call` and then passed around to the + # rest of the class's methods, imitating the object allows us to only lookup + # the cache hash once for every request. + # + private record CachedFile, data : Bytes, size : Int64, modification_time : Time + + CACHE_LIMIT = 5_000_000 # 5MB + @@cached_files = {} of Path => CachedFile + + # A simplified version of `#call` for Invidious to improve performance. + # + # This is basically the same as what we inherited but just with the directory listing + # features stripped out. This removes some conditional checks and calls which improves + # performance slightly but otherwise is entirely unneeded. + # + # Really, all the cache feature actually needs is to override the much simplifier `file_info` + # method to return a `CachedFile` or `File::Stat` depending on whether the file is cached. + def call(context) : Nil + check_request_method!(context) || return + + request_path = request_path(context) + + check_request_path!(context, request_path) || return + + request_path = Path.posix(request_path) + expanded_path = request_path.expand("/") + + # The path normalization can be simplified to just this since + # we don't need to care about normalizing directory urls. + if request_path != expanded_path + redirect_to context, expanded_path + end + + file_path = @public_dir.join(expanded_path.to_kind(Path::Kind.native)) + + if cached_info = @@cached_files[file_path]? + return serve_file_with_cache(context, cached_info, file_path) + end + + file_info = File.info?(file_path) + + return call_next(context) unless file_info + + if file_info.file? + # Actually means to serve file *with cache headers* + # The actual logic for serving the file is done in `#serve_file` + serve_file_with_cache(context, file_info, file_path) + else # Not a normal file (FIFO/device/socket) + call_next(context) + end + end + + # Add "Cache-Control" header to the response + private def add_cache_headers(response_headers : HTTP::Headers, last_modified : Time) : Nil + super; response_headers["Cache-Control"] = "max-age=2629800" + end + + # Serves and caches the file at the given path. + # + # This is an override of `serve_file` to allow serving a file from memory, and to cache it + # it as needed. + private def serve_file(context : HTTP::Server::Context, file_info, file_path : Path, original_file_path : Path, last_modified : Time) + context.response.content_type = MIME.from_filename(original_file_path.to_s, "application/octet-stream") + + range_header = context.request.headers["Range"]? + + if !file_info.is_a? CachedFile + retrieve_bytes_from = IO::Memory.new + + File.open(file_path) do |file| + # We cannot cache partial data so we'll rewind and read from the start + if range_header + dispatch_serve(context, file, file_info, range_header) + IO.copy(file.rewind, retrieve_bytes_from) + else + context.response.output = IO::MultiWriter.new(context.response.output, retrieve_bytes_from, sync_close: true) + dispatch_serve(context, file, file_info, range_header) + end + end + + return flush_io_to_cache(retrieve_bytes_from, file_path, file_info) + else + return dispatch_serve(context, file_info.data, file_info, range_header) + end + end + + # Writes file data to the cache + private def flush_io_to_cache(io, file_path, file_info) + if @@cached_files.sum(&.[1].size) + (size = file_info.size) < CACHE_LIMIT + data_slice = io.to_slice + @@cached_files[file_path] = CachedFile.new(data_slice, file_info.size, file_info.modification_time) + end + end + + # Either send the file in full, or just fragments of it depending on the request + private def dispatch_serve(context, file, file_info, range_header) + if range_header + # an IO is needed for `serve_file_range` + file = file.is_a?(Bytes) ? IO::Memory.new(file, writeable: false) : file + serve_file_range(context, file, range_header, file_info) + else + context.response.headers["Accept-Ranges"] = "bytes" + serve_file_full(context, file, file_info) + end + end + + # Skips the stdlib logic for serving pre-gzipped files + private def serve_file_compressed(context : HTTP::Server::Context, file_info, file_path : Path, last_modified : Time) + serve_file(context, file_info, file_path, file_path, last_modified) + end + + # If we're serving the full file right away then there's no need for an IO at all. + private def serve_file_full(context : HTTP::Server::Context, file : Bytes, file_info) + context.response.status = :ok + context.response.content_length = file_info.size + context.response.write file + end + + # Serves segments of a file based on the `Range header` + # + # An override of `serve_file_range` to allow using a generic IO rather than a `File`. + # Literally the same code as what we inherited but just with the `file` argument's type + # being set to `IO` rather than `File` + # + # Can be removed once https://github.com/crystal-lang/crystal/issues/15817 is fixed. + private def serve_file_range(context : HTTP::Server::Context, file : IO, range_header : String, file_info) + # Paste in the body of inherited serve_file_range + {{@type.superclass.methods.select(&.name.==("serve_file_range"))[0].body}} + end + end +end From 46a6993b15273290ff75307eb45e7421a0cc3bce Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 09:04:33 -0700 Subject: [PATCH 2/9] Simplify `StaticAssetsHandler` implementation Overriding `#call` or patching out `serve_file_compressed` provides only minimal benefits over the ease of maintenance granted by only overriding what we need to for the caching behavior. --- .../http_server/static_assets_handler.cr | 61 ++++++------------- 1 file changed, 17 insertions(+), 44 deletions(-) diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index c6137775b..7ea26dad9 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -9,52 +9,30 @@ module Invidious::HttpServer # rest of the class's methods, imitating the object allows us to only lookup # the cache hash once for every request. # - private record CachedFile, data : Bytes, size : Int64, modification_time : Time + private record CachedFile, data : Bytes, size : Int64, modification_time : Time do + def directory? + false + end + + def file? + true + end + end CACHE_LIMIT = 5_000_000 # 5MB @@cached_files = {} of Path => CachedFile - # A simplified version of `#call` for Invidious to improve performance. + # Returns metadata for the requested file # - # This is basically the same as what we inherited but just with the directory listing - # features stripped out. This removes some conditional checks and calls which improves - # performance slightly but otherwise is entirely unneeded. + # If the requested file is cached, a `CachedFile` is returned instead of a `File::Stat`. + # This represents the metadata info of a cached file and implements all the methods of `File::Stat` that + # is used by the `StaticAssetsHandler`. # - # Really, all the cache feature actually needs is to override the much simplifier `file_info` - # method to return a `CachedFile` or `File::Stat` depending on whether the file is cached. - def call(context) : Nil - check_request_method!(context) || return - - request_path = request_path(context) - - check_request_path!(context, request_path) || return - - request_path = Path.posix(request_path) - expanded_path = request_path.expand("/") - - # The path normalization can be simplified to just this since - # we don't need to care about normalizing directory urls. - if request_path != expanded_path - redirect_to context, expanded_path - end - + # The `CachedFile` also stores the raw bytes of the cached file, and this method serves as the place where + # the cached file is retrieved if it exists. Though the data will only be read in `#serve_file` + private def file_info(expanded_path : Path) file_path = @public_dir.join(expanded_path.to_kind(Path::Kind.native)) - - if cached_info = @@cached_files[file_path]? - return serve_file_with_cache(context, cached_info, file_path) - end - - file_info = File.info?(file_path) - - return call_next(context) unless file_info - - if file_info.file? - # Actually means to serve file *with cache headers* - # The actual logic for serving the file is done in `#serve_file` - serve_file_with_cache(context, file_info, file_path) - else # Not a normal file (FIFO/device/socket) - call_next(context) - end + {@@cached_files[file_path]? || File.info?(file_path), file_path} end # Add "Cache-Control" header to the response @@ -111,11 +89,6 @@ module Invidious::HttpServer end end - # Skips the stdlib logic for serving pre-gzipped files - private def serve_file_compressed(context : HTTP::Server::Context, file_info, file_path : Path, last_modified : Time) - serve_file(context, file_info, file_path, file_path, last_modified) - end - # If we're serving the full file right away then there's no need for an IO at all. private def serve_file_full(context : HTTP::Server::Context, file : Bytes, file_info) context.response.status = :ok From 40e10a346852bde2dd800aad46441253b8482ecb Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 09:23:28 -0700 Subject: [PATCH 3/9] Compare against 1.17.0-dev until full release --- src/ext/kemal_static_file_handler.cr | 2 +- src/invidious.cr | 2 +- src/invidious/http_server/static_assets_handler.cr | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ext/kemal_static_file_handler.cr b/src/ext/kemal_static_file_handler.cr index c6b9a27df..16cb84fbf 100644 --- a/src/ext/kemal_static_file_handler.cr +++ b/src/ext/kemal_static_file_handler.cr @@ -1,4 +1,4 @@ -{% if compare_versions(Crystal::VERSION, "1.17.0") >= 0 %} +{% if compare_versions(Crystal::VERSION, "1.17.0-dev") >= 0 %} # Strip StaticFileHandler from the binary # # This allows us to compile on 1.17.0 as the compiler won't try to diff --git a/src/invidious.cr b/src/invidious.cr index 179e67896..092c372ce 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -239,7 +239,7 @@ add_handler APIHandler.new add_handler AuthHandler.new add_handler DenyFrame.new -{% if compare_versions(Crystal::VERSION, "1.17.0") >= 0 %} +{% if compare_versions(Crystal::VERSION, "1.17.0-dev") >= 0 %} Kemal.config.serve_static = false add_handler Invidious::HttpServer::StaticAssetsHandler.new("assets", directory_listing: false) {% else %} diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index 7ea26dad9..243d6a8d5 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -1,4 +1,4 @@ -{% skip_file if compare_versions(Crystal::VERSION, "1.17.0") < 0 %} +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 %} module Invidious::HttpServer class StaticAssetsHandler < HTTP::StaticFileHandler From f2c06728d454e9e71a98d69fc3c97b47f70608fa Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 16:35:40 -0700 Subject: [PATCH 4/9] Add specs for the new StaticAssetsHandler --- .../handlers/static_assets_handler/test.txt | 1 + .../handlers/static_assets_handler_spec.cr | 205 ++++++++++++++++++ .../http_server/static_assets_handler.cr | 7 + 3 files changed, 213 insertions(+) create mode 100644 spec/http_server/handlers/static_assets_handler/test.txt create mode 100644 spec/http_server/handlers/static_assets_handler_spec.cr diff --git a/spec/http_server/handlers/static_assets_handler/test.txt b/spec/http_server/handlers/static_assets_handler/test.txt new file mode 100644 index 000000000..70c379b63 --- /dev/null +++ b/spec/http_server/handlers/static_assets_handler/test.txt @@ -0,0 +1 @@ +Hello world \ No newline at end of file diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr new file mode 100644 index 000000000..89c530147 --- /dev/null +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -0,0 +1,205 @@ +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 %} + +require "http" +require "spectator" +require "../../../src/invidious/http_server/static_assets_handler.cr" + +private def get_static_assets_handler + return Invidious::HttpServer::StaticAssetsHandler.new "spec/http_server/handlers/static_assets_handler", directory_listing: false +end + +# Slightly modified version of `handle` function from +# +# https://github.com/crystal-lang/crystal/blob/3f369d2c721e9462d9f6126cb0bcd4c6992f0225/spec/std/http/server/handlers/static_file_handler_spec.cr#L5 + +private def handle(request, handler : HTTP::Handler? = nil, decompress : Bool = false) + io = IO::Memory.new + response = HTTP::Server::Response.new(io) + context = HTTP::Server::Context.new(request, response) + + if !handler + handler = get_static_assets_handler + get_static_assets_handler.call context + else + handler.call(context) + end + + response.close + io.rewind + + HTTP::Client::Response.from_io(io, decompress: decompress) +end + +# Makes and yields a temporary file with the given prefix +private def make_temporary_file(prefix, contents = nil, &) + tempfile = File.tempfile(prefix, "static_assets_handler_spec", dir: "spec/http_server/handlers/static_assets_handler") + yield tempfile +ensure + tempfile.try &.delete +end + +# Get relative file path to a file within the static_assets_handler folder +macro get_file_path(basename) + "spec/http_server/handlers/static_assets_handler/#{ {{basename}} }" +end + +Spectator.describe StaticAssetsHandler do + it "Can serve a file" do + response = handle HTTP::Request.new("GET", "/test.txt") + expect(response.status_code).to eq(200) + expect(response.body).to eq(File.read(get_file_path("test.txt"))) + end + + it "Can serve cached file" do + make_temporary_file("cache_test") do |temporary_file| + temporary_file.rewind << "foo" + temporary_file.flush + expect(temporary_file.rewind.gets_to_end).to eq("foo") + + file_link = "/#{File.basename(temporary_file.path)}" + + # Should get cached by the first run + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("foo") + + # Update temporary file to "bar" + temporary_file.rewind << "bar" + temporary_file.flush + expect(temporary_file.rewind.gets_to_end).to eq("bar") + + # Second request should still return "foo" + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("foo") + end + end + + it "Adds cache headers" do + response = handle HTTP::Request.new("GET", "/test.txt") + expect(response.headers["cache_control"]).to eq("max-age=2629800") + end + + context "Can handle range requests" do + it "Can serve range request" do + headers = HTTP::Headers{"Range" => "bytes=0-2"} + response = handle HTTP::Request.new("GET", "/test.txt", headers) + + expect(response.status_code).to eq(206) + expect(response.headers["Content-Range"]?).to eq "bytes 0-2/11" + expect(response.body).to eq "Hel" + end + + it "Will cache entire file even if doing partial requests" do + make_temporary_file("range_cache") do |temporary_file| + temporary_file << "Hello world" + temporary_file.flush.rewind + file_link = "/#{File.basename(temporary_file.path)}" + + # Make request + headers = HTTP::Headers{"Range" => "bytes=0-2"} + response = handle HTTP::Request.new("GET", file_link, headers) + + # Mutate file on disk + temporary_file << "Something else" + temporary_file.flush.rewind + + # Second request shouldn't have changed + headers = HTTP::Headers{"Range" => "bytes=3-8"} + response = handle HTTP::Request.new("GET", file_link, headers) + expect(response.status_code).to eq(206) + expect(response.body).to eq "lo wor" + end + end + end + + context "Is able to support compression" do + def decompressed(string : String) + decompressed = Compress::Gzip::Reader.open(IO::Memory.new(string)) do |gzip| + gzip.gets_to_end + end + + return expect(decompressed) + end + + it "For full file requests" do + handler = HTTP::CompressHandler.new + handler.next = get_static_assets_handler() + + make_temporary_file("check decompression handler") do |temporary_file| + temporary_file << "Hello world" + temporary_file.flush.rewind + file_link = "/#{File.basename(temporary_file.path)}" + + # Can send from disk? + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + decompressed(response.body).to eq("Hello world") + + temporary_file << "Hello world" + temporary_file.flush.rewind + file_link = "/#{File.basename(temporary_file.path)}" + + # Are cached requests working? + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + decompressed(response.body).to eq("Hello world") + + # Able to retrieve non gzipped file? + response = handle HTTP::Request.new("GET", file_link), handler: handler + expect(response.body).to eq("Hello world") + expect(response.headers).to_not have_key("Content-Encoding") + end + end + + # Inspired by the equivalent tests from upstream + it "For partial file requests" do + handler = HTTP::CompressHandler.new + handler.next = get_static_assets_handler() + + make_temporary_file("check_decompression_handler_on_partial_requests") do |temporary_file| + temporary_file << "Hello world this is a very long string" + temporary_file.flush.rewind + file_link = "/#{File.basename(temporary_file.path)}" + + range_response_results = { + "10-20/38" => "d this is a", + "0-0/38" => "H", + "5-9/38" => " worl", + } + + range_request_header_value = {"10-20", "5-9", "0-0"}.join(',') + range_response_header_value = range_response_results.keys + + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Range" => "bytes=#{range_request_header_value}", "Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + + # Decompress response + response = HTTP::Client::Response.new( + status: response.status, + headers: response.headers, + body_io: Compress::Gzip::Reader.new(IO::Memory.new(response.body)), + ) + + count = 0 + MIME::Multipart.parse(response) do |headers, part| + part_range = headers["Content-Range"][6..] + expect(part_range).to be_within(range_response_header_value) + expect(part.gets_to_end).to eq(range_response_results[part_range]) + count += 1 + end + + expect(count).to eq(3) + + # Is the file cached? + temporary_file << "Something else" + temporary_file.flush.rewind + + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + decompressed(response.body).to eq("Hello world this is a very long string") + end + end + end + + after_each { Invidious::HttpServer::StaticAssetsHandler.clear_cache } +end diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index 243d6a8d5..8f2c1b7e6 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -107,5 +107,12 @@ module Invidious::HttpServer # Paste in the body of inherited serve_file_range {{@type.superclass.methods.select(&.name.==("serve_file_range"))[0].body}} end + + # Clear cached files. + # + # This is only used in the specs to clear the cache before each handler test + def self.clear_cache + return @@cached_files.clear + end end end From 9aab1b470da6dada371b707650b454704e60ee7c Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 16:39:59 -0700 Subject: [PATCH 5/9] Isolate static assets handler spec from others Running `crystal spec` without a file argument essentially produces one big program that combines every single spec file, their imports, and the files that those imports themselves depend on. Most of the types within this combined program will get ignored by the compiler due to a lack of any calls to them from the spec files. But for some types, partially the HTTP module ones, using them within the spec files will suddenly make the compiler enable a bunch of previously ignored code. And those code will suddenly require the presence of additional types, constants, etc. This not only make it annoying for getting the specs working but also makes it difficult to isolate behaviors for testing. The `static_assets_handler_spec.cr` causes this issue and so will be marked as an isolated spec for now. In the future all of the tests should be organized into independent groupings similar to how the Crystal compiler splits their tests into std, compiler, primitives and interpreter. --- .../handlers/static_assets_handler_spec.cr | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr index 89c530147..9b7a363e1 100644 --- a/spec/http_server/handlers/static_assets_handler_spec.cr +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -1,4 +1,13 @@ -{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 %} +# Due to the way that specs are handled this file cannot be run together with +# everything else without causing a compile time error that'll be incredibly +# annoying to resolve. +# +# TODO: Create different spec categories that can then be ran through make. +# An implementation of this can be seen with the tests for the Crystal compiler itself. +# +# For now run this with `crystal spec spec/http_server/handlers/static_assets_handler_spec.cr -Drunning_by_self` + +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 || !flag?(:running_by_self) %} require "http" require "spectator" From d0dcb52e1db8adebf2e3d574c1ce036b7fbdd631 Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 16:40:35 -0700 Subject: [PATCH 6/9] Fix Ameba Lint/UselessAssign --- spec/http_server/handlers/static_assets_handler_spec.cr | 3 +-- src/invidious/http_server/static_assets_handler.cr | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr index 9b7a363e1..373d59fd2 100644 --- a/spec/http_server/handlers/static_assets_handler_spec.cr +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -106,8 +106,7 @@ Spectator.describe StaticAssetsHandler do file_link = "/#{File.basename(temporary_file.path)}" # Make request - headers = HTTP::Headers{"Range" => "bytes=0-2"} - response = handle HTTP::Request.new("GET", file_link, headers) + handle HTTP::Request.new("GET", file_link, HTTP::Headers{"Range" => "bytes=0-2"}) # Mutate file on disk temporary_file << "Something else" diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index 8f2c1b7e6..94add5a80 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -71,7 +71,7 @@ module Invidious::HttpServer # Writes file data to the cache private def flush_io_to_cache(io, file_path, file_info) - if @@cached_files.sum(&.[1].size) + (size = file_info.size) < CACHE_LIMIT + if @@cached_files.sum(&.[1].size) + file_info.size < CACHE_LIMIT data_slice = io.to_slice @@cached_files[file_path] = CachedFile.new(data_slice, file_info.size, file_info.modification_time) end From d002b08f4f1a53a26dc02073d8a2cb0b7e0359aa Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 17:07:51 -0700 Subject: [PATCH 7/9] Refactor logic for updating temp files in tests --- .../handlers/static_assets_handler_spec.cr | 125 ++++++++---------- 1 file changed, 57 insertions(+), 68 deletions(-) diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr index 373d59fd2..4b50171ae 100644 --- a/spec/http_server/handlers/static_assets_handler_spec.cr +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -42,11 +42,21 @@ end # Makes and yields a temporary file with the given prefix private def make_temporary_file(prefix, contents = nil, &) tempfile = File.tempfile(prefix, "static_assets_handler_spec", dir: "spec/http_server/handlers/static_assets_handler") - yield tempfile + file_link = "/#{File.basename(tempfile.path)}" + yield tempfile, file_link ensure tempfile.try &.delete end +# Changes the contents of the temporary file after yield +private def cycle_temporary_file_contents(temporary_file, initial, &) + temporary_file.rewind << initial + temporary_file.rewind.flush + yield + temporary_file.rewind << "something else" + temporary_file.rewind.flush +end + # Get relative file path to a file within the static_assets_handler folder macro get_file_path(basename) "spec/http_server/handlers/static_assets_handler/#{ {{basename}} }" @@ -60,24 +70,19 @@ Spectator.describe StaticAssetsHandler do end it "Can serve cached file" do - make_temporary_file("cache_test") do |temporary_file| - temporary_file.rewind << "foo" - temporary_file.flush - expect(temporary_file.rewind.gets_to_end).to eq("foo") + make_temporary_file("cache_test") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "foo") do + expect(temporary_file.rewind.gets_to_end).to eq("foo") - file_link = "/#{File.basename(temporary_file.path)}" + # Should get cached by the first run + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("foo") + end - # Should get cached by the first run - response = handle HTTP::Request.new("GET", file_link) - expect(response.status_code).to eq(200) - expect(response.body).to eq("foo") - - # Update temporary file to "bar" - temporary_file.rewind << "bar" - temporary_file.flush - expect(temporary_file.rewind.gets_to_end).to eq("bar") - - # Second request should still return "foo" + # Temporary file is updated after `cycle_temporary_file_contents` is called + # but if the file is successfully cached then we'll only get the original + # contents. response = handle HTTP::Request.new("GET", file_link) expect(response.status_code).to eq(200) expect(response.body).to eq("foo") @@ -100,17 +105,10 @@ Spectator.describe StaticAssetsHandler do end it "Will cache entire file even if doing partial requests" do - make_temporary_file("range_cache") do |temporary_file| - temporary_file << "Hello world" - temporary_file.flush.rewind - file_link = "/#{File.basename(temporary_file.path)}" - - # Make request - handle HTTP::Request.new("GET", file_link, HTTP::Headers{"Range" => "bytes=0-2"}) - - # Mutate file on disk - temporary_file << "Something else" - temporary_file.flush.rewind + make_temporary_file("range_cache") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world") do + handle HTTP::Request.new("GET", file_link, HTTP::Headers{"Range" => "bytes=0-2"}) + end # Second request shouldn't have changed headers = HTTP::Headers{"Range" => "bytes=3-8"} @@ -134,19 +132,12 @@ Spectator.describe StaticAssetsHandler do handler = HTTP::CompressHandler.new handler.next = get_static_assets_handler() - make_temporary_file("check decompression handler") do |temporary_file| - temporary_file << "Hello world" - temporary_file.flush.rewind - file_link = "/#{File.basename(temporary_file.path)}" - - # Can send from disk? - response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler - expect(response.headers["Content-Encoding"]).to eq("gzip") - decompressed(response.body).to eq("Hello world") - - temporary_file << "Hello world" - temporary_file.flush.rewind - file_link = "/#{File.basename(temporary_file.path)}" + make_temporary_file("check decompression handler") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world") do + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + decompressed(response.body).to eq("Hello world") + end # Are cached requests working? response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler @@ -165,40 +156,38 @@ Spectator.describe StaticAssetsHandler do handler = HTTP::CompressHandler.new handler.next = get_static_assets_handler() - make_temporary_file("check_decompression_handler_on_partial_requests") do |temporary_file| - temporary_file << "Hello world this is a very long string" - temporary_file.flush.rewind - file_link = "/#{File.basename(temporary_file.path)}" + make_temporary_file("check_decompression_handler_on_partial_requests") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world this is a very long string") do + range_response_results = { + "10-20/38" => "d this is a", + "0-0/38" => "H", + "5-9/38" => " worl", + } - range_response_results = { - "10-20/38" => "d this is a", - "0-0/38" => "H", - "5-9/38" => " worl", - } + range_request_header_value = {"10-20", "5-9", "0-0"}.join(',') + range_response_header_value = range_response_results.keys - range_request_header_value = {"10-20", "5-9", "0-0"}.join(',') - range_response_header_value = range_response_results.keys + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Range" => "bytes=#{range_request_header_value}", "Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") - response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Range" => "bytes=#{range_request_header_value}", "Accept-Encoding" => "gzip"}), handler: handler - expect(response.headers["Content-Encoding"]).to eq("gzip") + # Decompress response + response = HTTP::Client::Response.new( + status: response.status, + headers: response.headers, + body_io: Compress::Gzip::Reader.new(IO::Memory.new(response.body)), + ) - # Decompress response - response = HTTP::Client::Response.new( - status: response.status, - headers: response.headers, - body_io: Compress::Gzip::Reader.new(IO::Memory.new(response.body)), - ) + count = 0 + MIME::Multipart.parse(response) do |headers, part| + part_range = headers["Content-Range"][6..] + expect(part_range).to be_within(range_response_header_value) + expect(part.gets_to_end).to eq(range_response_results[part_range]) + count += 1 + end - count = 0 - MIME::Multipart.parse(response) do |headers, part| - part_range = headers["Content-Range"][6..] - expect(part_range).to be_within(range_response_header_value) - expect(part.gets_to_end).to eq(range_response_results[part_range]) - count += 1 + expect(count).to eq(3) end - expect(count).to eq(3) - # Is the file cached? temporary_file << "Something else" temporary_file.flush.rewind From 15086f4b6287bdc034aedae62101c3ae4a3b7f0b Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 17:10:10 -0700 Subject: [PATCH 8/9] Improve cache size check to be more performant Summing the sizes of each cached file every time is very inefficient. Instead we can simply store the cache size in an constant and increase it everytime a file is added into the cache. --- .../handlers/static_assets_handler_spec.cr | 31 +++++++++++++++++++ .../http_server/static_assets_handler.cr | 7 +++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr index 4b50171ae..76dc7be77 100644 --- a/spec/http_server/handlers/static_assets_handler_spec.cr +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -198,5 +198,36 @@ Spectator.describe StaticAssetsHandler do end end + it "Will not cache additional files if the cache limit is reached" do + 5.times do |times| + data = "a" * 1_000_000 + + make_temporary_file("test cache size limit #{times}") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, data) do + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq(data) + end + + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq(data) + end + end + + # Cache should be 5 mb so no more files will be cached. + make_temporary_file("test cache size limit uncached") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "a") do + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("a") + end + + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to_not eq("a") + end + end + after_each { Invidious::HttpServer::StaticAssetsHandler.clear_cache } end diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index 94add5a80..e086ac3b0 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -20,6 +20,7 @@ module Invidious::HttpServer end CACHE_LIMIT = 5_000_000 # 5MB + @@current_cache_size = 0 @@cached_files = {} of Path => CachedFile # Returns metadata for the requested file @@ -71,9 +72,8 @@ module Invidious::HttpServer # Writes file data to the cache private def flush_io_to_cache(io, file_path, file_info) - if @@cached_files.sum(&.[1].size) + file_info.size < CACHE_LIMIT - data_slice = io.to_slice - @@cached_files[file_path] = CachedFile.new(data_slice, file_info.size, file_info.modification_time) + if (@@current_cache_size += file_info.size) <= CACHE_LIMIT + @@cached_files[file_path] = CachedFile.new(io.to_slice, file_info.size, file_info.modification_time) end end @@ -112,6 +112,7 @@ module Invidious::HttpServer # # This is only used in the specs to clear the cache before each handler test def self.clear_cache + @@current_cache_size = 0 return @@cached_files.clear end end From 92888bdf1326f1ef8c638dc82d32127d59b128eb Mon Sep 17 00:00:00 2001 From: syeopite Date: Sat, 23 Aug 2025 20:51:30 -0700 Subject: [PATCH 9/9] Reduce indent in StaticAssetsHandler#serve_file --- .../http_server/static_assets_handler.cr | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index e086ac3b0..7902c95bf 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -50,24 +50,25 @@ module Invidious::HttpServer range_header = context.request.headers["Range"]? - if !file_info.is_a? CachedFile - retrieve_bytes_from = IO::Memory.new - - File.open(file_path) do |file| - # We cannot cache partial data so we'll rewind and read from the start - if range_header - dispatch_serve(context, file, file_info, range_header) - IO.copy(file.rewind, retrieve_bytes_from) - else - context.response.output = IO::MultiWriter.new(context.response.output, retrieve_bytes_from, sync_close: true) - dispatch_serve(context, file, file_info, range_header) - end - end - - return flush_io_to_cache(retrieve_bytes_from, file_path, file_info) - else + # If the file is cached we can just directly serve it + if file_info.is_a? CachedFile return dispatch_serve(context, file_info.data, file_info, range_header) end + + # Otherwise we'll need to read from disk and cache it + retrieve_bytes_from = IO::Memory.new + File.open(file_path) do |file| + # We cannot cache partial data so we'll rewind and read from the start + if range_header + dispatch_serve(context, file, file_info, range_header) + IO.copy(file.rewind, retrieve_bytes_from) + else + context.response.output = IO::MultiWriter.new(context.response.output, retrieve_bytes_from, sync_close: true) + dispatch_serve(context, file, file_info, range_header) + end + end + + return flush_io_to_cache(retrieve_bytes_from, file_path, file_info) end # Writes file data to the cache