diff --git a/spec/http_server/handlers/static_assets_handler/test.txt b/spec/http_server/handlers/static_assets_handler/test.txt new file mode 100644 index 00000000..70c379b6 --- /dev/null +++ b/spec/http_server/handlers/static_assets_handler/test.txt @@ -0,0 +1 @@ +Hello world \ No newline at end of file diff --git a/spec/http_server/handlers/static_assets_handler_spec.cr b/spec/http_server/handlers/static_assets_handler_spec.cr new file mode 100644 index 00000000..76dc7be7 --- /dev/null +++ b/spec/http_server/handlers/static_assets_handler_spec.cr @@ -0,0 +1,233 @@ +# Due to the way that specs are handled this file cannot be run together with +# everything else without causing a compile time error that'll be incredibly +# annoying to resolve. +# +# TODO: Create different spec categories that can then be ran through make. +# An implementation of this can be seen with the tests for the Crystal compiler itself. +# +# For now run this with `crystal spec spec/http_server/handlers/static_assets_handler_spec.cr -Drunning_by_self` + +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 || !flag?(:running_by_self) %} + +require "http" +require "spectator" +require "../../../src/invidious/http_server/static_assets_handler.cr" + +private def get_static_assets_handler + return Invidious::HttpServer::StaticAssetsHandler.new "spec/http_server/handlers/static_assets_handler", directory_listing: false +end + +# Slightly modified version of `handle` function from +# +# https://github.com/crystal-lang/crystal/blob/3f369d2c721e9462d9f6126cb0bcd4c6992f0225/spec/std/http/server/handlers/static_file_handler_spec.cr#L5 + +private def handle(request, handler : HTTP::Handler? = nil, decompress : Bool = false) + io = IO::Memory.new + response = HTTP::Server::Response.new(io) + context = HTTP::Server::Context.new(request, response) + + if !handler + handler = get_static_assets_handler + get_static_assets_handler.call context + else + handler.call(context) + end + + response.close + io.rewind + + HTTP::Client::Response.from_io(io, decompress: decompress) +end + +# Makes and yields a temporary file with the given prefix +private def make_temporary_file(prefix, contents = nil, &) + tempfile = File.tempfile(prefix, "static_assets_handler_spec", dir: "spec/http_server/handlers/static_assets_handler") + file_link = "/#{File.basename(tempfile.path)}" + yield tempfile, file_link +ensure + tempfile.try &.delete +end + +# Changes the contents of the temporary file after yield +private def cycle_temporary_file_contents(temporary_file, initial, &) + temporary_file.rewind << initial + temporary_file.rewind.flush + yield + temporary_file.rewind << "something else" + temporary_file.rewind.flush +end + +# Get relative file path to a file within the static_assets_handler folder +macro get_file_path(basename) + "spec/http_server/handlers/static_assets_handler/#{ {{basename}} }" +end + +Spectator.describe StaticAssetsHandler do + it "Can serve a file" do + response = handle HTTP::Request.new("GET", "/test.txt") + expect(response.status_code).to eq(200) + expect(response.body).to eq(File.read(get_file_path("test.txt"))) + end + + it "Can serve cached file" do + make_temporary_file("cache_test") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "foo") do + expect(temporary_file.rewind.gets_to_end).to eq("foo") + + # Should get cached by the first run + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("foo") + end + + # Temporary file is updated after `cycle_temporary_file_contents` is called + # but if the file is successfully cached then we'll only get the original + # contents. + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("foo") + end + end + + it "Adds cache headers" do + response = handle HTTP::Request.new("GET", "/test.txt") + expect(response.headers["cache_control"]).to eq("max-age=2629800") + end + + context "Can handle range requests" do + it "Can serve range request" do + headers = HTTP::Headers{"Range" => "bytes=0-2"} + response = handle HTTP::Request.new("GET", "/test.txt", headers) + + expect(response.status_code).to eq(206) + expect(response.headers["Content-Range"]?).to eq "bytes 0-2/11" + expect(response.body).to eq "Hel" + end + + it "Will cache entire file even if doing partial requests" do + make_temporary_file("range_cache") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world") do + handle HTTP::Request.new("GET", file_link, HTTP::Headers{"Range" => "bytes=0-2"}) + end + + # Second request shouldn't have changed + headers = HTTP::Headers{"Range" => "bytes=3-8"} + response = handle HTTP::Request.new("GET", file_link, headers) + expect(response.status_code).to eq(206) + expect(response.body).to eq "lo wor" + end + end + end + + context "Is able to support compression" do + def decompressed(string : String) + decompressed = Compress::Gzip::Reader.open(IO::Memory.new(string)) do |gzip| + gzip.gets_to_end + end + + return expect(decompressed) + end + + it "For full file requests" do + handler = HTTP::CompressHandler.new + handler.next = get_static_assets_handler() + + make_temporary_file("check decompression handler") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world") do + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + decompressed(response.body).to eq("Hello world") + end + + # Are cached requests working? + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + decompressed(response.body).to eq("Hello world") + + # Able to retrieve non gzipped file? + response = handle HTTP::Request.new("GET", file_link), handler: handler + expect(response.body).to eq("Hello world") + expect(response.headers).to_not have_key("Content-Encoding") + end + end + + # Inspired by the equivalent tests from upstream + it "For partial file requests" do + handler = HTTP::CompressHandler.new + handler.next = get_static_assets_handler() + + make_temporary_file("check_decompression_handler_on_partial_requests") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "Hello world this is a very long string") do + range_response_results = { + "10-20/38" => "d this is a", + "0-0/38" => "H", + "5-9/38" => " worl", + } + + range_request_header_value = {"10-20", "5-9", "0-0"}.join(',') + range_response_header_value = range_response_results.keys + + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Range" => "bytes=#{range_request_header_value}", "Accept-Encoding" => "gzip"}), handler: handler + expect(response.headers["Content-Encoding"]).to eq("gzip") + + # Decompress response + response = HTTP::Client::Response.new( + status: response.status, + headers: response.headers, + body_io: Compress::Gzip::Reader.new(IO::Memory.new(response.body)), + ) + + count = 0 + MIME::Multipart.parse(response) do |headers, part| + part_range = headers["Content-Range"][6..] + expect(part_range).to be_within(range_response_header_value) + expect(part.gets_to_end).to eq(range_response_results[part_range]) + count += 1 + end + + expect(count).to eq(3) + end + + # Is the file cached? + temporary_file << "Something else" + temporary_file.flush.rewind + + response = handle HTTP::Request.new("GET", file_link, headers: HTTP::Headers{"Accept-Encoding" => "gzip"}), handler: handler + decompressed(response.body).to eq("Hello world this is a very long string") + end + end + end + + it "Will not cache additional files if the cache limit is reached" do + 5.times do |times| + data = "a" * 1_000_000 + + make_temporary_file("test cache size limit #{times}") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, data) do + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq(data) + end + + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq(data) + end + end + + # Cache should be 5 mb so no more files will be cached. + make_temporary_file("test cache size limit uncached") do |temporary_file, file_link| + cycle_temporary_file_contents(temporary_file, "a") do + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to eq("a") + end + + response = handle HTTP::Request.new("GET", file_link) + expect(response.status_code).to eq(200) + expect(response.body).to_not eq("a") + end + end + + after_each { Invidious::HttpServer::StaticAssetsHandler.clear_cache } +end diff --git a/src/ext/kemal_static_file_handler.cr b/src/ext/kemal_static_file_handler.cr index a5f42261..16cb84fb 100644 --- a/src/ext/kemal_static_file_handler.cr +++ b/src/ext/kemal_static_file_handler.cr @@ -1,3 +1,24 @@ +{% if compare_versions(Crystal::VERSION, "1.17.0-dev") >= 0 %} + # Strip StaticFileHandler from the binary + # + # This allows us to compile on 1.17.0 as the compiler won't try to + # semantically check the outdated upstream code. + class Kemal::Config + private def setup_static_file_handler + end + end + + # Nullify `Kemal::StaticFileHandler` + # + # Needed until the next release of Kemal after 1.7 + class Kemal::StaticFileHandler < HTTP::StaticFileHandler + def call(context : HTTP::Server::Context) + end + end + + {% skip_file %} +{% end %} + # Since systems have a limit on number of open files (`ulimit -a`), # we serve them from memory to avoid 'Too many open files' without needing # to modify ulimit. diff --git a/src/invidious.cr b/src/invidious.cr index 69f8a26c..cc0a5d57 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -225,19 +225,25 @@ error 500 do |env, ex| error_template(500, ex) end -static_headers do |env| - env.response.headers.add("Cache-Control", "max-age=2629800") -end - # Init Kemal -public_folder "assets" - Kemal.config.powered_by_header = false add_handler FilteredCompressHandler.new add_handler APIHandler.new add_handler AuthHandler.new add_handler DenyFrame.new + +{% if compare_versions(Crystal::VERSION, "1.17.0-dev") >= 0 %} + Kemal.config.serve_static = false + add_handler Invidious::HttpServer::StaticAssetsHandler.new("assets", directory_listing: false) +{% else %} + public_folder "assets" + + static_headers do |env| + env.response.headers.add("Cache-Control", "max-age=2629800") + end +{% end %} + add_context_storage_type(Array(String)) add_context_storage_type(Preferences) add_context_storage_type(Invidious::User) diff --git a/src/invidious/http_server/static_assets_handler.cr b/src/invidious/http_server/static_assets_handler.cr new file mode 100644 index 00000000..e086ac3b --- /dev/null +++ b/src/invidious/http_server/static_assets_handler.cr @@ -0,0 +1,119 @@ +{% skip_file if compare_versions(Crystal::VERSION, "1.17.0-dev") < 0 %} + +module Invidious::HttpServer + class StaticAssetsHandler < HTTP::StaticFileHandler + # In addition to storing the actual data of a file, it also implements the required + # getters needed for the object to imitate a `File::Stat` within `StaticFileHandler`. + # + # Since the `File::Stat` is created once in `#call` and then passed around to the + # rest of the class's methods, imitating the object allows us to only lookup + # the cache hash once for every request. + # + private record CachedFile, data : Bytes, size : Int64, modification_time : Time do + def directory? + false + end + + def file? + true + end + end + + CACHE_LIMIT = 5_000_000 # 5MB + @@current_cache_size = 0 + @@cached_files = {} of Path => CachedFile + + # Returns metadata for the requested file + # + # If the requested file is cached, a `CachedFile` is returned instead of a `File::Stat`. + # This represents the metadata info of a cached file and implements all the methods of `File::Stat` that + # is used by the `StaticAssetsHandler`. + # + # The `CachedFile` also stores the raw bytes of the cached file, and this method serves as the place where + # the cached file is retrieved if it exists. Though the data will only be read in `#serve_file` + private def file_info(expanded_path : Path) + file_path = @public_dir.join(expanded_path.to_kind(Path::Kind.native)) + {@@cached_files[file_path]? || File.info?(file_path), file_path} + end + + # Add "Cache-Control" header to the response + private def add_cache_headers(response_headers : HTTP::Headers, last_modified : Time) : Nil + super; response_headers["Cache-Control"] = "max-age=2629800" + end + + # Serves and caches the file at the given path. + # + # This is an override of `serve_file` to allow serving a file from memory, and to cache it + # it as needed. + private def serve_file(context : HTTP::Server::Context, file_info, file_path : Path, original_file_path : Path, last_modified : Time) + context.response.content_type = MIME.from_filename(original_file_path.to_s, "application/octet-stream") + + range_header = context.request.headers["Range"]? + + if !file_info.is_a? CachedFile + retrieve_bytes_from = IO::Memory.new + + File.open(file_path) do |file| + # We cannot cache partial data so we'll rewind and read from the start + if range_header + dispatch_serve(context, file, file_info, range_header) + IO.copy(file.rewind, retrieve_bytes_from) + else + context.response.output = IO::MultiWriter.new(context.response.output, retrieve_bytes_from, sync_close: true) + dispatch_serve(context, file, file_info, range_header) + end + end + + return flush_io_to_cache(retrieve_bytes_from, file_path, file_info) + else + return dispatch_serve(context, file_info.data, file_info, range_header) + end + end + + # Writes file data to the cache + private def flush_io_to_cache(io, file_path, file_info) + if (@@current_cache_size += file_info.size) <= CACHE_LIMIT + @@cached_files[file_path] = CachedFile.new(io.to_slice, file_info.size, file_info.modification_time) + end + end + + # Either send the file in full, or just fragments of it depending on the request + private def dispatch_serve(context, file, file_info, range_header) + if range_header + # an IO is needed for `serve_file_range` + file = file.is_a?(Bytes) ? IO::Memory.new(file, writeable: false) : file + serve_file_range(context, file, range_header, file_info) + else + context.response.headers["Accept-Ranges"] = "bytes" + serve_file_full(context, file, file_info) + end + end + + # If we're serving the full file right away then there's no need for an IO at all. + private def serve_file_full(context : HTTP::Server::Context, file : Bytes, file_info) + context.response.status = :ok + context.response.content_length = file_info.size + context.response.write file + end + + # Serves segments of a file based on the `Range header` + # + # An override of `serve_file_range` to allow using a generic IO rather than a `File`. + # Literally the same code as what we inherited but just with the `file` argument's type + # being set to `IO` rather than `File` + # + # Can be removed once https://github.com/crystal-lang/crystal/issues/15817 is fixed. + private def serve_file_range(context : HTTP::Server::Context, file : IO, range_header : String, file_info) + # Paste in the body of inherited serve_file_range + {{@type.superclass.methods.select(&.name.==("serve_file_range"))[0].body}} + end + + # Clear cached files. + # + # This is only used in the specs to clear the cache before each handler test + def self.clear_cache + @@current_cache_size = 0 + return @@cached_files.clear + end + end +end