From 748ef2332635210983e46aab3d3ae7e6e5b487f5 Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 07:50:21 -0700 Subject: [PATCH 1/8] Replace `Kemal::StaticFileHandler` on Crystal < 1.17.0 Kemal's subclass of the stdlib `HTTP::StaticFileHandler` is not as maintained as its parent, and so misses out on many enhancements and bug fixes from upstream, which unfortunately also includes the patches for security vulnerabilities... Though this isn't necessarily Kemal's fault since the bulk of the stdlib handler's logic was done in a single big method, making any changes hard to maintain. This was fixed in Crystal 1.17.0 where the handler was refactored into many private methods, making it easier for an inheriting type to implement custom behaviors while still leveraging much of the pre-existing code. Since we don't actually use any of the Kemal specific features added by `Kemal::StaticFileHandler`, there really isn't a reason to not just create a new handler based upon the stdlib implementation instead which will address the problems mentioned above. This PR implements a new handler which inherits from the stdlib variant and overrides the helper methods added in Crystal 1.17.0 to add the caching behavior with minimal code changes. Since this new handler depends on the code in Crystal 1.17.0, it will only be applied on versions greater than or equal to 1.17.0. On older versions we'll fallback to the current monkey patched `Kemal::StaticFileHandler` --- src/ext/kemal_static_file_handler.cr | 21 +++ src/invidious.cr | 18 ++- .../http_server/static_assets_handler.cr | 138 ++++++++++++++++++ 3 files changed, 171 insertions(+), 6 deletions(-) create mode 100644 src/invidious/http_server/static_assets_handler.cr diff --git a/src/ext/kemal_static_file_handler.cr b/src/ext/kemal_static_file_handler.cr index a5f42261..c6b9a27d 100644 --- a/src/ext/kemal_static_file_handler.cr +++ b/src/ext/kemal_static_file_handler.cr @@ -1,3 +1,24 @@ +{% if compare_versions(Crystal::VERSION, "1.17.0") >= 0 %} + # Strip StaticFileHandler from the binary + # + # This allows us to compile on 1.17.0 as the compiler won't try to + # semantically check the outdated upstream code. + class Kemal::Config + private def setup_static_file_handler + end + end + + # Nullify `Kemal::StaticFileHandler` + # + # Needed until the next release of Kemal after 1.7 + class Kemal::StaticFileHandler < HTTP::StaticFileHandler + def call(context : HTTP::Server::Context) + end + end + + {% skip_file %} +{% end %} + # Since systems have a limit on number of open files (`ulimit -a`), # we serve them from memory to avoid 'Too many open files' without needing # to modify ulimit. diff --git a/src/invidious.cr b/src/invidious.cr index 69f8a26c..358dc084 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -225,19 +225,25 @@ error 500 do |env, ex| error_template(500, ex) end -static_headers do |env| - env.response.headers.add("Cache-Control", "max-age=2629800") -end - # Init Kemal -public_folder "assets" - Kemal.config.powered_by_header = false add_handler FilteredCompressHandler.new add_handler APIHandler.new add_handler AuthHandler.new add_handler DenyFrame.new + +{% if compare_versions(Crystal::VERSION, "1.17.0") >= 0 %} + Kemal.config.serve_static = false + add_handler Invidious::HttpServer::StaticAssetsHandler.new("assets", directory_listing: false) +{% else %} + public_folder "assets" + + static_headers do |env| + env.response.headers.add("Cache-Control", "max-age=2629800") + end +{% end %} + add_context_storage_type(Array(String)) add_context_storage_type(Preferences) add_context_storage_type(Invidious::User) diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr new file mode 100644 index 00000000..c6137775 --- /dev/null +++ b/src/invidious/http_server/static_assets_handler.cr @@ -0,0 +1,138 @@ +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0") < 0 %} + +module Invidious::HttpServer + class StaticAssetsHandler < HTTP::StaticFileHandler + # In addition to storing the actual data of a file, it also implements the required + # getters needed for the object to imitate a `File::Stat` within `StaticFileHandler`. + # + # Since the `File::Stat` is created once in `#call` and then passed around to the + # rest of the class's methods, imitating the object allows us to only lookup + # the cache hash once for every request. + # + private record CachedFile, data : Bytes, size : Int64, modification_time : Time + + CACHE_LIMIT = 5_000_000 # 5MB + @@cached_files = {} of Path => CachedFile + + # A simplified version of `#call` for Invidious to improve performance. + # + # This is basically the same as what we inherited but just with the directory listing + # features stripped out. This removes some conditional checks and calls which improves + # performance slightly but otherwise is entirely unneeded. + # + # Really, all the cache feature actually needs is to override the much simplifier `file_info` + # method to return a `CachedFile` or `File::Stat` depending on whether the file is cached. + def call(context) : Nil + check_request_method!(context) || return + + request_path = request_path(context) + + check_request_path!(context, request_path) || return + + request_path = Path.posix(request_path) + expanded_path = request_path.expand("/") + + # The path normalization can be simplified to just this since + # we don't need to care about normalizing directory urls. + if request_path != expanded_path + redirect_to context, expanded_path + end + + file_path = @public_dir.join(expanded_path.to_kind(Path::Kind.native)) + + if cached_info = @@cached_files[file_path]? + return serve_file_with_cache(context, cached_info, file_path) + end + + file_info = File.info?(file_path) + + return call_next(context) unless file_info + + if file_info.file? + # Actually means to serve file *with cache headers* + # The actual logic for serving the file is done in `#serve_file` + serve_file_with_cache(context, file_info, file_path) + else # Not a normal file (FIFO/device/socket) + call_next(context) + end + end + + # Add "Cache-Control" header to the response + private def add_cache_headers(response_headers : HTTP::Headers, last_modified : Time) : Nil + super; response_headers["Cache-Control"] = "max-age=2629800" + end + + # Serves and caches the file at the given path. + # + # This is an override of `serve_file` to allow serving a file from memory, and to cache it + # it as needed. + private def serve_file(context : HTTP::Server::Context, file_info, file_path : Path, original_file_path : Path, last_modified : Time) + context.response.content_type = MIME.from_filename(original_file_path.to_s, "application/octet-stream") + + range_header = context.request.headers["Range"]? + + if !file_info.is_a? CachedFile + retrieve_bytes_from = IO::Memory.new + + File.open(file_path) do |file| + # We cannot cache partial data so we'll rewind and read from the start + if range_header + dispatch_serve(context, file, file_info, range_header) + IO.copy(file.rewind, retrieve_bytes_from) + else + context.response.output = IO::MultiWriter.new(context.response.output, retrieve_bytes_from, sync_close: true) + dispatch_serve(context, file, file_info, range_header) + end + end + + return flush_io_to_cache(retrieve_bytes_from, file_path, file_info) + else + return dispatch_serve(context, file_info.data, file_info, range_header) + end + end + + # Writes file data to the cache + private def flush_io_to_cache(io, file_path, file_info) + if @@cached_files.sum(&.[1].size) + (size = file_info.size) < CACHE_LIMIT + data_slice = io.to_slice + @@cached_files[file_path] = CachedFile.new(data_slice, file_info.size, file_info.modification_time) + end + end + + # Either send the file in full, or just fragments of it depending on the request + private def dispatch_serve(context, file, file_info, range_header) + if range_header + # an IO is needed for `serve_file_range` + file = file.is_a?(Bytes) ? IO::Memory.new(file, writeable: false) : file + serve_file_range(context, file, range_header, file_info) + else + context.response.headers["Accept-Ranges"] = "bytes" + serve_file_full(context, file, file_info) + end + end + + # Skips the stdlib logic for serving pre-gzipped files + private def serve_file_compressed(context : HTTP::Server::Context, file_info, file_path : Path, last_modified : Time) + serve_file(context, file_info, file_path, file_path, last_modified) + end + + # If we're serving the full file right away then there's no need for an IO at all. + private def serve_file_full(context : HTTP::Server::Context, file : Bytes, file_info) + context.response.status = :ok + context.response.content_length = file_info.size + context.response.write file + end + + # Serves segments of a file based on the `Range header` + # + # An override of `serve_file_range` to allow using a generic IO rather than a `File`. + # Literally the same code as what we inherited but just with the `file` argument's type + # being set to `IO` rather than `File` + # + # Can be removed once https://github.com/crystal-lang/crystal/issues/15817 is fixed. + private def serve_file_range(context : HTTP::Server::Context, file : IO, range_header : String, file_info) + # Paste in the body of inherited serve_file_range + {{@type.superclass.methods.select(&.name.==("serve_file_range"))[0].body}} + end + end +end From 628f45de01cb7610ad7000ca2c37bddfee72639e Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 09:04:33 -0700 Subject: [PATCH 2/8] Simplify `StaticAssetsHandler` implementation Overriding `#call` or patching out `serve_file_compressed` provides only minimal benefits over the ease of maintenance granted by only overriding what we need to for the caching behavior. --- .../http_server/static_assets_handler.cr | 61 ++++++------------- 1 file changed, 17 insertions(+), 44 deletions(-) diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index c6137775..7ea26dad 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -9,52 +9,30 @@ module Invidious::HttpServer # rest of the class's methods, imitating the object allows us to only lookup # the cache hash once for every request. # - private record CachedFile, data : Bytes, size : Int64, modification_time : Time + private record CachedFile, data : Bytes, size : Int64, modification_time : Time do + def directory? + false + end + + def file? + true + end + end CACHE_LIMIT = 5_000_000 # 5MB @@cached_files = {} of Path => CachedFile - # A simplified version of `#call` for Invidious to improve performance. + # Returns metadata for the requested file # - # This is basically the same as what we inherited but just with the directory listing - # features stripped out. This removes some conditional checks and calls which improves - # performance slightly but otherwise is entirely unneeded. + # If the requested file is cached, a `CachedFile` is returned instead of a `File::Stat`. + # This represents the metadata info of a cached file and implements all the methods of `File::Stat` that + # is used by the `StaticAssetsHandler`. # - # Really, all the cache feature actually needs is to override the much simplifier `file_info` - # method to return a `CachedFile` or `File::Stat` depending on whether the file is cached. - def call(context) : Nil - check_request_method!(context) || return - - request_path = request_path(context) - - check_request_path!(context, request_path) || return - - request_path = Path.posix(request_path) - expanded_path = request_path.expand("/") - - # The path normalization can be simplified to just this since - # we don't need to care about normalizing directory urls. - if request_path != expanded_path - redirect_to context, expanded_path - end - + # The `CachedFile` also stores the raw bytes of the cached file, and this method serves as the place where + # the cached file is retrieved if it exists. Though the data will only be read in `#serve_file` + private def file_info(expanded_path : Path) file_path = @public_dir.join(expanded_path.to_kind(Path::Kind.native)) - - if cached_info = @@cached_files[file_path]? - return serve_file_with_cache(context, cached_info, file_path) - end - - file_info = File.info?(file_path) - - return call_next(context) unless file_info - - if file_info.file? - # Actually means to serve file *with cache headers* - # The actual logic for serving the file is done in `#serve_file` - serve_file_with_cache(context, file_info, file_path) - else # Not a normal file (FIFO/device/socket) - call_next(context) - end + {@@cached_files[file_path]? || File.info?(file_path), file_path} end # Add "Cache-Control" header to the response @@ -111,11 +89,6 @@ module Invidious::HttpServer end end - # Skips the stdlib logic for serving pre-gzipped files - private def serve_file_compressed(context : HTTP::Server::Context, file_info, file_path : Path, last_modified : Time) - serve_file(context, file_info, file_path, file_path, last_modified) - end - # If we're serving the full file right away then there's no need for an IO at all. private def serve_file_full(context : HTTP::Server::Context, file : Bytes, file_info) context.response.status = :ok From 60172a2e6ae32673d3deaa76f58bda8ee43a5f9f Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 09:23:28 -0700 Subject: [PATCH 3/8] Compare against 1.17.0-dev until full release --- src/ext/kemal_static_file_handler.cr | 2 +- src/invidious.cr | 2 +- src/invidious/http_server/static_assets_handler.cr | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ext/kemal_static_file_handler.cr b/src/ext/kemal_static_file_handler.cr index c6b9a27d..16cb84fb 100644 --- a/src/ext/kemal_static_file_handler.cr +++ b/src/ext/kemal_static_file_handler.cr @@ -1,4 +1,4 @@ -{% if compare_versions(Crystal::VERSION, "1.17.0") >= 0 %} +{% if compare_versions(Crystal::VERSION, "1.17.0-dev") >= 0 %} # Strip StaticFileHandler from the binary # # This allows us to compile on 1.17.0 as the compiler won't try to diff --git a/src/invidious.cr b/src/invidious.cr index 358dc084..cc0a5d57 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -233,7 +233,7 @@ add_handler APIHandler.new add_handler AuthHandler.new add_handler DenyFrame.new -{% if compare_versions(Crystal::VERSION, "1.17.0") >= 0 %} +{% if compare_versions(Crystal::VERSION, "1.17.0-dev") >= 0 %} Kemal.config.serve_static = false add_handler Invidious::HttpServer::StaticAssetsHandler.new("assets", directory_listing: false) {% else %} diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index 7ea26dad..243d6a8d 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -1,4 +1,4 @@ -{% skip_file if compare_versions(Crystal::VERSION, "1.17.0") < 0 %} +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 %} module Invidious::HttpServer class StaticAssetsHandler < HTTP::StaticFileHandler From 6c927c89a671a4bd4cec983842be76a347a91778 Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 16:35:40 -0700 Subject: [PATCH 4/8] Add specs for the new StaticAssetsHandler --- .../handlers/static_assets_handler/test.txt | 1 + .../handlers/static_assets_handler_spec.cr | 205 ++++++++++++++++++ .../http_server/static_assets_handler.cr | 7 + 3 files changed, 213 insertions(+) create mode 100644 spec/http_server/handlers/static_assets_handler/test.txt create mode 100644 spec/http_server/handlers/static_assets_handler_spec.cr diff --git a/spec/http_server/handlers/static_assets_handler/test.txt b/spec/http_server/handlers/static_assets_handler/test.txt new file mode 100644 index 00000000..70c379b6 --- /dev/null +++ b/spec/http_server/handlers/static_assets_handler/test.txt @@ -0,0 +1 @@ +Hello world \ No newline at end of file diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr new file mode 100644 index 00000000..89c53014 --- /dev/null +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -0,0 +1,205 @@ +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 %} + +require "http" +require "spectator" +require "../../../src/invidious/http_server/static_assets_handler.cr" + +private def get_static_assets_handler + return Invidious::HttpServer::StaticAssetsHandler.new "spec/http_server/handlers/static_assets_handler", directory_listing: false +end + +# Slightly modified version of `handle` function from +# +# https://github.com/crystal-lang/crystal/blob/3f369d2c721e9462d9f6126cb0bcd4c6992f0225/spec/std/http/server/handlers/static_file_handler_spec.cr#L5 + +private def handle(request, handler : HTTP::Handler? = nil, decompress : Bool = false) + io = IO::Memory.new + response = HTTP::Server::Response.new(io) + context = HTTP::Server::Context.new(request, response) + + if !handler + handler = get_static_assets_handler + get_static_assets_handler.call context + else + handler.call(context) + end + + response.close + io.rewind + + HTTP::Client::Response.from_io(io, decompress: decompress) +end + +# Makes and yields a temporary file with the given prefix +private def make_temporary_file(prefix, contents = nil, &) + tempfile = File.tempfile(prefix, "static_assets_handler_spec", dir: "spec/http_server/handlers/static_assets_handler") + yield tempfile +ensure + tempfile.try &.delete +end + +# Get relative file path to a file within the static_assets_handler folder +macro get_file_path(basename) + "spec/http_server/handlers/static_assets_handler/#{ {{basename}} }" +end + +Spectator.describe StaticAssetsHandler do + it "Can serve a file" do + response = handle HTTP::Request.new("GET", "/test.txt") + expect(response.status_code).to eq(200) + expect(response.body).to eq(File.read(get_file_path("test.txt"))) + end + + it "Can serve cached file" do + make_temporary_file("cache_test") do |temporary_file| + temporary_file.rewind << "foo" + temporary_file.flush + expect(temporary_file.rewind.gets_to_end).to eq("foo") + + file_link = "/#{File.basename(temporary_file.path)}" + + # Should get cached by the first run + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("foo") + + # Update temporary file to "bar" + temporary_file.rewind << "bar" + temporary_file.flush + expect(temporary_file.rewind.gets_to_end).to eq("bar") + + # Second request should still return "foo" + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("foo") + end + end + + it "Adds cache headers" do + response = handle HTTP::Request.new("GET", "/test.txt") + expect(response.headers["cache_control"]).to eq("max-age=2629800") + end + + context "Can handle range requests" do + it "Can serve range request" do + headers = HTTP::Headers{"Range" => "bytes=0-2"} + response = handle HTTP::Request.new("GET", "/test.txt", headers) + + expect(response.status_code).to eq(206) + expect(response.headers["Content-Range"]?).to eq "bytes 0-2/11" + expect(response.body).to eq "Hel" + end + + it "Will cache entire file even if doing partial requests" do + make_temporary_file("range_cache") do |temporary_file| + temporary_file << "Hello world" + temporary_file.flush.rewind + file_link = "/#{File.basename(temporary_file.path)}" + + # Make request + headers = HTTP::Headers{"Range" => "bytes=0-2"} + response = handle HTTP::Request.new("GET", file_link, headers) + + # Mutate file on disk + temporary_file << "Something else" + temporary_file.flush.rewind + + # Second request shouldn't have changed + headers = HTTP::Headers{"Range" => "bytes=3-8"} + response = handle HTTP::Request.new("GET", file_link, headers) + expect(response.status_code).to eq(206) + expect(response.body).to eq "lo wor" + end + end + end + + context "Is able to support compression" do + def decompressed(string : String) + decompressed = Compress::Gzip::Reader.open(IO::Memory.new(string)) do |gzip| + gzip.gets_to_end + end + + return expect(decompressed) + end + + it "For full file requests" do + handler = HTTP::CompressHandler.new + handler.next = get_static_assets_handler() + + make_temporary_file("check decompression handler") do |temporary_file| + temporary_file << "Hello world" + temporary_file.flush.rewind + file_link = "/#{File.basename(temporary_file.path)}" + + # Can send from disk? + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + decompressed(response.body).to eq("Hello world") + + temporary_file << "Hello world" + temporary_file.flush.rewind + file_link = "/#{File.basename(temporary_file.path)}" + + # Are cached requests working? + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + decompressed(response.body).to eq("Hello world") + + # Able to retrieve non gzipped file? + response = handle HTTP::Request.new("GET", file_link), handler: handler + expect(response.body).to eq("Hello world") + expect(response.headers).to_not have_key("Content-Encoding") + end + end + + # Inspired by the equivalent tests from upstream + it "For partial file requests" do + handler = HTTP::CompressHandler.new + handler.next = get_static_assets_handler() + + make_temporary_file("check_decompression_handler_on_partial_requests") do |temporary_file| + temporary_file << "Hello world this is a very long string" + temporary_file.flush.rewind + file_link = "/#{File.basename(temporary_file.path)}" + + range_response_results = { + "10-20/38" => "d this is a", + "0-0/38" => "H", + "5-9/38" => " worl", + } + + range_request_header_value = {"10-20", "5-9", "0-0"}.join(',') + range_response_header_value = range_response_results.keys + + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Range" => "bytes=#{range_request_header_value}", "Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + + # Decompress response + response = HTTP::Client::Response.new( + status: response.status, + headers: response.headers, + body_io: Compress::Gzip::Reader.new(IO::Memory.new(response.body)), + ) + + count = 0 + MIME::Multipart.parse(response) do |headers, part| + part_range = headers["Content-Range"][6..] + expect(part_range).to be_within(range_response_header_value) + expect(part.gets_to_end).to eq(range_response_results[part_range]) + count += 1 + end + + expect(count).to eq(3) + + # Is the file cached? + temporary_file << "Something else" + temporary_file.flush.rewind + + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + decompressed(response.body).to eq("Hello world this is a very long string") + end + end + end + + after_each { Invidious::HttpServer::StaticAssetsHandler.clear_cache } +end diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index 243d6a8d..8f2c1b7e 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -107,5 +107,12 @@ module Invidious::HttpServer # Paste in the body of inherited serve_file_range {{@type.superclass.methods.select(&.name.==("serve_file_range"))[0].body}} end + + # Clear cached files. + # + # This is only used in the specs to clear the cache before each handler test + def self.clear_cache + return @@cached_files.clear + end end end From 7226a728244b4581c945fbe236d85298891bf4ab Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 16:39:59 -0700 Subject: [PATCH 5/8] Isolate static assets handler spec from others Running `crystal spec` without a file argument essentially produces one big program that combines every single spec file, their imports, and the files that those imports themselves depend on. Most of the types within this combined program will get ignored by the compiler due to a lack of any calls to them from the spec files. But for some types, partially the HTTP module ones, using them within the spec files will suddenly make the compiler enable a bunch of previously ignored code. And those code will suddenly require the presence of additional types, constants, etc. This not only make it annoying for getting the specs working but also makes it difficult to isolate behaviors for testing. The `static_assets_handler_spec.cr` causes this issue and so will be marked as an isolated spec for now. In the future all of the tests should be organized into independent groupings similar to how the Crystal compiler splits their tests into std, compiler, primitives and interpreter. --- .../handlers/static_assets_handler_spec.cr | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr index 89c53014..9b7a363e 100644 --- a/spec/http_server/handlers/static_assets_handler_spec.cr +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -1,4 +1,13 @@ -{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 %} +# Due to the way that specs are handled this file cannot be run together with +# everything else without causing a compile time error that'll be incredibly +# annoying to resolve. +# +# TODO: Create different spec categories that can then be ran through make. +# An implementation of this can be seen with the tests for the Crystal compiler itself. +# +# For now run this with `crystal spec spec/http_server/handlers/static_assets_handler_spec.cr -Drunning_by_self` + +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 || !flag?(:running_by_self) %} require "http" require "spectator" From dc198546a0e863a5cb8a8d5f5a1f80cce34740b5 Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 16:40:35 -0700 Subject: [PATCH 6/8] Fix Ameba Lint/UselessAssign --- spec/http_server/handlers/static_assets_handler_spec.cr | 3 +-- src/invidious/http_server/static_assets_handler.cr | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr index 9b7a363e..373d59fd 100644 --- a/spec/http_server/handlers/static_assets_handler_spec.cr +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -106,8 +106,7 @@ Spectator.describe StaticAssetsHandler do file_link = "/#{File.basename(temporary_file.path)}" # Make request - headers = HTTP::Headers{"Range" => "bytes=0-2"} - response = handle HTTP::Request.new("GET", file_link, headers) + handle HTTP::Request.new("GET", file_link, HTTP::Headers{"Range" => "bytes=0-2"}) # Mutate file on disk temporary_file << "Something else" diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index 8f2c1b7e..94add5a8 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -71,7 +71,7 @@ module Invidious::HttpServer # Writes file data to the cache private def flush_io_to_cache(io, file_path, file_info) - if @@cached_files.sum(&.[1].size) + (size = file_info.size) < CACHE_LIMIT + if @@cached_files.sum(&.[1].size) + file_info.size < CACHE_LIMIT data_slice = io.to_slice @@cached_files[file_path] = CachedFile.new(data_slice, file_info.size, file_info.modification_time) end From 52865ff0a2097b5bfa5fab5202d951885871e86b Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 17:07:51 -0700 Subject: [PATCH 7/8] Refactor logic for updating temp files in tests --- .../handlers/static_assets_handler_spec.cr | 125 ++++++++---------- 1 file changed, 57 insertions(+), 68 deletions(-) diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr index 373d59fd..4b50171a 100644 --- a/spec/http_server/handlers/static_assets_handler_spec.cr +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -42,11 +42,21 @@ end # Makes and yields a temporary file with the given prefix private def make_temporary_file(prefix, contents = nil, &) tempfile = File.tempfile(prefix, "static_assets_handler_spec", dir: "spec/http_server/handlers/static_assets_handler") - yield tempfile + file_link = "/#{File.basename(tempfile.path)}" + yield tempfile, file_link ensure tempfile.try &.delete end +# Changes the contents of the temporary file after yield +private def cycle_temporary_file_contents(temporary_file, initial, &) + temporary_file.rewind << initial + temporary_file.rewind.flush + yield + temporary_file.rewind << "something else" + temporary_file.rewind.flush +end + # Get relative file path to a file within the static_assets_handler folder macro get_file_path(basename) "spec/http_server/handlers/static_assets_handler/#{ {{basename}} }" @@ -60,24 +70,19 @@ Spectator.describe StaticAssetsHandler do end it "Can serve cached file" do - make_temporary_file("cache_test") do |temporary_file| - temporary_file.rewind << "foo" - temporary_file.flush - expect(temporary_file.rewind.gets_to_end).to eq("foo") + make_temporary_file("cache_test") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "foo") do + expect(temporary_file.rewind.gets_to_end).to eq("foo") - file_link = "/#{File.basename(temporary_file.path)}" + # Should get cached by the first run + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("foo") + end - # Should get cached by the first run - response = handle HTTP::Request.new("GET", file_link) - expect(response.status_code).to eq(200) - expect(response.body).to eq("foo") - - # Update temporary file to "bar" - temporary_file.rewind << "bar" - temporary_file.flush - expect(temporary_file.rewind.gets_to_end).to eq("bar") - - # Second request should still return "foo" + # Temporary file is updated after `cycle_temporary_file_contents` is called + # but if the file is successfully cached then we'll only get the original + # contents. response = handle HTTP::Request.new("GET", file_link) expect(response.status_code).to eq(200) expect(response.body).to eq("foo") @@ -100,17 +105,10 @@ Spectator.describe StaticAssetsHandler do end it "Will cache entire file even if doing partial requests" do - make_temporary_file("range_cache") do |temporary_file| - temporary_file << "Hello world" - temporary_file.flush.rewind - file_link = "/#{File.basename(temporary_file.path)}" - - # Make request - handle HTTP::Request.new("GET", file_link, HTTP::Headers{"Range" => "bytes=0-2"}) - - # Mutate file on disk - temporary_file << "Something else" - temporary_file.flush.rewind + make_temporary_file("range_cache") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world") do + handle HTTP::Request.new("GET", file_link, HTTP::Headers{"Range" => "bytes=0-2"}) + end # Second request shouldn't have changed headers = HTTP::Headers{"Range" => "bytes=3-8"} @@ -134,19 +132,12 @@ Spectator.describe StaticAssetsHandler do handler = HTTP::CompressHandler.new handler.next = get_static_assets_handler() - make_temporary_file("check decompression handler") do |temporary_file| - temporary_file << "Hello world" - temporary_file.flush.rewind - file_link = "/#{File.basename(temporary_file.path)}" - - # Can send from disk? - response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler - expect(response.headers["Content-Encoding"]).to eq("gzip") - decompressed(response.body).to eq("Hello world") - - temporary_file << "Hello world" - temporary_file.flush.rewind - file_link = "/#{File.basename(temporary_file.path)}" + make_temporary_file("check decompression handler") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world") do + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + decompressed(response.body).to eq("Hello world") + end # Are cached requests working? response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler @@ -165,40 +156,38 @@ Spectator.describe StaticAssetsHandler do handler = HTTP::CompressHandler.new handler.next = get_static_assets_handler() - make_temporary_file("check_decompression_handler_on_partial_requests") do |temporary_file| - temporary_file << "Hello world this is a very long string" - temporary_file.flush.rewind - file_link = "/#{File.basename(temporary_file.path)}" + make_temporary_file("check_decompression_handler_on_partial_requests") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world this is a very long string") do + range_response_results = { + "10-20/38" => "d this is a", + "0-0/38" => "H", + "5-9/38" => " worl", + } - range_response_results = { - "10-20/38" => "d this is a", - "0-0/38" => "H", - "5-9/38" => " worl", - } + range_request_header_value = {"10-20", "5-9", "0-0"}.join(',') + range_response_header_value = range_response_results.keys - range_request_header_value = {"10-20", "5-9", "0-0"}.join(',') - range_response_header_value = range_response_results.keys + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Range" => "bytes=#{range_request_header_value}", "Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") - response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Range" => "bytes=#{range_request_header_value}", "Accept-Encoding" => "gzip"}), handler: handler - expect(response.headers["Content-Encoding"]).to eq("gzip") + # Decompress response + response = HTTP::Client::Response.new( + status: response.status, + headers: response.headers, + body_io: Compress::Gzip::Reader.new(IO::Memory.new(response.body)), + ) - # Decompress response - response = HTTP::Client::Response.new( - status: response.status, - headers: response.headers, - body_io: Compress::Gzip::Reader.new(IO::Memory.new(response.body)), - ) + count = 0 + MIME::Multipart.parse(response) do |headers, part| + part_range = headers["Content-Range"][6..] + expect(part_range).to be_within(range_response_header_value) + expect(part.gets_to_end).to eq(range_response_results[part_range]) + count += 1 + end - count = 0 - MIME::Multipart.parse(response) do |headers, part| - part_range = headers["Content-Range"][6..] - expect(part_range).to be_within(range_response_header_value) - expect(part.gets_to_end).to eq(range_response_results[part_range]) - count += 1 + expect(count).to eq(3) end - expect(count).to eq(3) - # Is the file cached? temporary_file << "Something else" temporary_file.flush.rewind From c012206f802074e5a27a3454db4f263305397b62 Mon Sep 17 00:00:00 2001 From: syeopite Date: Tue, 3 Jun 2025 17:10:10 -0700 Subject: [PATCH 8/8] Improve cache size check to be more performant Summing the sizes of each cached file every time is very inefficient. Instead we can simply store the cache size in an constant and increase it everytime a file is added into the cache. --- .../handlers/static_assets_handler_spec.cr | 31 +++++++++++++++++++ .../http_server/static_assets_handler.cr | 7 +++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr index 4b50171a..76dc7be7 100644 --- a/spec/http_server/handlers/static_assets_handler_spec.cr +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -198,5 +198,36 @@ Spectator.describe StaticAssetsHandler do end end + it "Will not cache additional files if the cache limit is reached" do + 5.times do |times| + data = "a" * 1_000_000 + + make_temporary_file("test cache size limit #{times}") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, data) do + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq(data) + end + + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq(data) + end + end + + # Cache should be 5 mb so no more files will be cached. + make_temporary_file("test cache size limit uncached") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "a") do + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("a") + end + + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to_not eq("a") + end + end + after_each { Invidious::HttpServer::StaticAssetsHandler.clear_cache } end diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr index 94add5a8..e086ac3b 100644 --- a/src/invidious/http_server/static_assets_handler.cr +++ b/src/invidious/http_server/static_assets_handler.cr @@ -20,6 +20,7 @@ module Invidious::HttpServer end CACHE_LIMIT = 5_000_000 # 5MB + @@current_cache_size = 0 @@cached_files = {} of Path => CachedFile # Returns metadata for the requested file @@ -71,9 +72,8 @@ module Invidious::HttpServer # Writes file data to the cache private def flush_io_to_cache(io, file_path, file_info) - if @@cached_files.sum(&.[1].size) + file_info.size < CACHE_LIMIT - data_slice = io.to_slice - @@cached_files[file_path] = CachedFile.new(data_slice, file_info.size, file_info.modification_time) + if (@@current_cache_size += file_info.size) <= CACHE_LIMIT + @@cached_files[file_path] = CachedFile.new(io.to_slice, file_info.size, file_info.modification_time) end end @@ -112,6 +112,7 @@ module Invidious::HttpServer # # This is only used in the specs to clear the cache before each handler test def self.clear_cache + @@current_cache_size = 0 return @@cached_files.clear end end