diff --git a/spec/invidious/hashtag_spec.cr b/spec/invidious/hashtag_spec.cr
index abc81225..9578929d 100644
--- a/spec/invidious/hashtag_spec.cr
+++ b/spec/invidious/hashtag_spec.cr
@@ -3,7 +3,7 @@ require "../parsers_helper.cr"
 Spectator.describe Invidious::Hashtag do
   it "parses richItemRenderer containers (test 1)" do
     # Enable mock
-    test_content = load_mock("hashtag/martingarrix_page1")
+    test_content = load_mock("hashtag/martingarrix_page1").as_h
     videos, _ = extract_items(test_content)
 
     expect(typeof(videos)).to eq(Array(SearchItem))
@@ -56,7 +56,7 @@ Spectator.describe Invidious::Hashtag do
 
   it "parses richItemRenderer containers (test 2)" do
     # Enable mock
-    test_content = load_mock("hashtag/martingarrix_page2")
+    test_content = load_mock("hashtag/martingarrix_page2").as_h
     videos, _ = extract_items(test_content)
 
     expect(typeof(videos)).to eq(Array(SearchItem))
diff --git a/spec/invidious/videos/adaptative_audio_stream_spec.cr b/spec/invidious/videos/adaptative_audio_stream_spec.cr
new file mode 100644
index 00000000..e7e0c8de
--- /dev/null
+++ b/spec/invidious/videos/adaptative_audio_stream_spec.cr
@@ -0,0 +1,143 @@
+require "../../parsers_helper.cr"
+
+Spectator.describe Invidious::Videos do
+  subject(streams) {
+    described_class.parse_adaptative_formats(load_mock(mock_name))
+  }
+
+  describe ".parse_adaptative_formats" do
+    provided mock_name: "streams/audio_stereo" do
+      expect(streams.size).to eq(2)
+
+      sample_one = streams[0]
+      sample_two = streams[1]
+
+      #
+      # Test 1 / itag 139
+      #
+
+      expect(sample_one).to be_a(Invidious::Videos::AdaptativeAudioStream)
+      sample_one = sample_one.as(Invidious::Videos::AdaptativeAudioStream)
+
+      expect(sample_one.itag).to eq(139)
+      expect(sample_one.label).to eq("low")
+      expect(sample_one.url).to eq("https://rr5---sn-4g5edndl.googlevideo.com/videoplayback")
+
+      expect(sample_one.last_modified).to eq(Time.utc(seconds: 1_677_975_303_i64, nanoseconds: 572_731_000))
+      expect(sample_one.projection_type).to eq(Invidious::Videos::ProjType::Rectangular)
+
+      expect(sample_one).to have_attributes(
+        raw_mime_type: "audio/mp4; codecs=\"mp4a.40.5\"",
+        mime_type: "audio/mp4",
+        codecs: "mp4a.40.5",
+        # Adaptative properties
+        init_range: Invidious::Videos::ByteRange.new(0_u32, 640_u32),
+        index_range: Invidious::Videos::ByteRange.new(641_u32, 2148_u32),
+        # Common properties
+        bitrate: 50_854,
+        bitrate_avg: 48_788,
+        content_length: 7_454_256,
+        # Audio properties
+        audio_quality: Invidious::Videos::AudioQuality::Low,
+        audio_sample_rate: 22_050,
+        audio_channels: 2,
+        audio_loudness_db: -5.01,
+        audio_spatial_type: Invidious::Videos::SpatialType::None
+      )
+
+      #
+      # Test 2 / itag 251 (Note: most properties aren't checked)
+      #
+
+      expect(sample_two).to be_a(Invidious::Videos::AdaptativeAudioStream)
+      sample_two = sample_two.as(Invidious::Videos::AdaptativeAudioStream)
+
+      expect(sample_two.itag).to eq(251)
+      expect(sample_two.label).to eq("medium")
+
+      expect(sample_two).to have_attributes(
+        raw_mime_type: "audio/webm; codecs=\"opus\"",
+        mime_type: "audio/webm",
+        codecs: "opus",
+        # Audio properties
+        audio_quality: Invidious::Videos::AudioQuality::Medium,
+        audio_sample_rate: 48_000,
+        audio_channels: 2,
+        audio_loudness_db: -5.01,
+        audio_spatial_type: Invidious::Videos::SpatialType::None
+      )
+    end
+
+    provided mock_name: "streams/audio_spatial" do
+      expect(streams.size).to eq(2)
+
+      sample_one = streams[0] # Quad
+      sample_two = streams[1] # 5.1
+
+      # Test 1
+
+      expect(sample_one).to be_a(Invidious::Videos::AdaptativeAudioStream)
+      sample_one = sample_one.as(Invidious::Videos::AdaptativeAudioStream)
+
+      expect(sample_one.itag).to eq(327)
+
+      expect(sample_one).to have_attributes(
+        audio_quality: Invidious::Videos::AudioQuality::Medium,
+        audio_sample_rate: 44_100,
+        audio_channels: 6,
+        audio_loudness_db: 0.0,
+        audio_spatial_type: Invidious::Videos::SpatialType::Ambisonics_5_1
+      )
+
+      # Test 2
+
+      expect(sample_two).to be_a(Invidious::Videos::AdaptativeAudioStream)
+      sample_two = sample_two.as(Invidious::Videos::AdaptativeAudioStream)
+
+      expect(sample_two.itag).to eq(338)
+
+      expect(sample_two).to have_attributes(
+        audio_quality: Invidious::Videos::AudioQuality::Medium,
+        audio_sample_rate: 48_000,
+        audio_channels: 4,
+        audio_loudness_db: 0.0,
+        audio_spatial_type: Invidious::Videos::SpatialType::AmbisonicsQuad
+      )
+    end
+
+    provided mock_name: "streams/audio_multi_lang" do
+      expect(streams.size).to eq(8)
+
+      sample_one = streams[1] # English
+      sample_two = streams[4] # hindi
+
+      # Test 1
+
+      expect(sample_one).to be_a(Invidious::Videos::AdaptativeAudioTrackStream)
+      sample_one = sample_one.as(Invidious::Videos::AdaptativeAudioTrackStream)
+
+      expect(sample_one.itag).to eq(249)
+
+      expect(sample_one).to have_attributes(
+        track_id: "en.0",
+        track_name: "English",
+        iso_code: "en",
+        default: true
+      )
+
+      # Test 2
+
+      expect(sample_two).to be_a(Invidious::Videos::AdaptativeAudioTrackStream)
+      sample_two = sample_two.as(Invidious::Videos::AdaptativeAudioTrackStream)
+
+      expect(sample_two.itag).to eq(249)
+
+      expect(sample_two).to have_attributes(
+        track_id: "hi.0",
+        track_name: "Hindi",
+        iso_code: "hi",
+        default: false
+      )
+    end
+  end
+end
diff --git a/spec/invidious/videos/regular_videos_extract_spec.cr b/spec/invidious/videos/regular_videos_extract_spec.cr
index f96703f6..f7f6ba13 100644
--- a/spec/invidious/videos/regular_videos_extract_spec.cr
+++ b/spec/invidious/videos/regular_videos_extract_spec.cr
@@ -3,8 +3,8 @@ require "../../parsers_helper.cr"
 Spectator.describe "parse_video_info" do
   it "parses a regular video" do
     # Enable mock
-    _player = load_mock("video/regular_mrbeast.player")
-    _next = load_mock("video/regular_mrbeast.next")
+    _player = load_mock("video/regular_mrbeast.player").as_h
+    _next = load_mock("video/regular_mrbeast.next").as_h
 
     raw_data = _player.merge!(_next)
     info = parse_video_info("2isYuQZMbdU", raw_data)
@@ -85,8 +85,8 @@ Spectator.describe "parse_video_info" do
 
   it "parses a regular video with no descrition/comments" do
     # Enable mock
-    _player = load_mock("video/regular_no-description.player")
-    _next = load_mock("video/regular_no-description.next")
+    _player = load_mock("video/regular_no-description.player").as_h
+    _next = load_mock("video/regular_no-description.next").as_h
 
     raw_data = _player.merge!(_next)
     info = parse_video_info("iuevw6218F0", raw_data)
diff --git a/spec/invidious/videos/scheduled_live_extract_spec.cr b/spec/invidious/videos/scheduled_live_extract_spec.cr
index c3a9b228..344883b6 100644
--- a/spec/invidious/videos/scheduled_live_extract_spec.cr
+++ b/spec/invidious/videos/scheduled_live_extract_spec.cr
@@ -3,8 +3,8 @@ require "../../parsers_helper.cr"
 Spectator.describe "parse_video_info" do
   it "parses scheduled livestreams data" do
     # Enable mock
-    _player = load_mock("video/scheduled_live_PBD-Podcast.player")
-    _next = load_mock("video/scheduled_live_PBD-Podcast.next")
+    _player = load_mock("video/scheduled_live_PBD-Podcast.player").as_h
+    _next = load_mock("video/scheduled_live_PBD-Podcast.next").as_h
 
     raw_data = _player.merge!(_next)
     info = parse_video_info("N-yVic7BbY0", raw_data)
diff --git a/spec/parsers_helper.cr b/spec/parsers_helper.cr
index 6589acad..7646cfde 100644
--- a/spec/parsers_helper.cr
+++ b/spec/parsers_helper.cr
@@ -22,11 +22,11 @@ require "../src/invidious/yt_backend/extractors_utils"
 OUTPUT = File.open(File::NULL, "w")
 LOGGER = Invidious::LogHandler.new(OUTPUT, LogLevel::Off)
 
-def load_mock(file) : Hash(String, JSON::Any)
+def load_mock(file) : JSON::Any
   file = File.join(__DIR__, "..", "mocks", file + ".json")
   content = File.read(file)
 
-  return JSON.parse(content).as_h
+  return JSON.parse(content)
 end
 
 Spectator.configure do |config|
diff --git a/src/invidious/frontend/watch_page.cr b/src/invidious/frontend/watch_page.cr
index c8cb7110..21f2197e 100644
--- a/src/invidious/frontend/watch_page.cr
+++ b/src/invidious/frontend/watch_page.cr
@@ -4,9 +4,9 @@ module Invidious::Frontend::WatchPage
   # A handy structure to pass many elements at
   # once to the download widget function
   struct VideoAssets
-    getter full_videos : Array(Hash(String, JSON::Any))
-    getter video_streams : Array(Hash(String, JSON::Any))
-    getter audio_streams : Array(Hash(String, JSON::Any))
+    getter full_videos : Array(Videos::ProgressiveHttpStream)
+    getter video_streams : Array(Videos::AdaptativeVideoStream)
+    getter audio_streams : Array(Videos::AdaptativeAudioStream)
     getter captions : Array(Invidious::Videos::Captions::Metadata)
 
     def initialize(
@@ -47,38 +47,33 @@ module Invidious::Frontend::WatchPage
       # Non-DASH videos (audio+video)
 
       video_assets.full_videos.each do |option|
-        mimetype = option["mimeType"].as_s.split(";")[0]
+        height = Invidious::Videos::Formats.itag_to_metadata?(option.itag).try &.["height"]?
 
-        height = Invidious::Videos::Formats.itag_to_metadata?(option["itag"]).try &.["height"]?
-
-        value = {"itag": option["itag"], "ext": mimetype.split("/")[1]}.to_json
+        value = {"itag": option.itag, "ext": option.mime_type.split("/")[1]}.to_json
 
         str << "\t\t\t<option value='" << value << "'>"
-        str << (height || "~240") << "p - " << mimetype
+        str << (height || option.video_height) << "p - " << option.mime_type
         str << "</option>\n"
       end
 
       # DASH video streams
 
       video_assets.video_streams.each do |option|
-        mimetype = option["mimeType"].as_s.split(";")[0]
-
-        value = {"itag": option["itag"], "ext": mimetype.split("/")[1]}.to_json
+        value = {"itag": option.itag, "ext": option.mime_type.split("/")[1]}.to_json
 
         str << "\t\t\t<option value='" << value << "'>"
-        str << option["qualityLabel"] << " - " << mimetype << " @ " << option["fps"] << "fps - video only"
+        str << option.label << " - " << option.mime_type
+        str << " @ " << option.video_fps << "fps - video only"
         str << "</option>\n"
       end
 
       # DASH audio streams
 
       video_assets.audio_streams.each do |option|
-        mimetype = option["mimeType"].as_s.split(";")[0]
-
-        value = {"itag": option["itag"], "ext": mimetype.split("/")[1]}.to_json
+        value = {"itag": option.itag, "ext": option.mime_type.split("/")[1]}.to_json
 
         str << "\t\t\t<option value='" << value << "'>"
-        str << mimetype << " @ " << (option["bitrate"]?.try &.as_i./ 1000) << "k - audio only"
+        str << option.mime_type << " @ " << (option.bitrate // 1000) << "kbps - audio only"
         str << "</option>\n"
       end
 
diff --git a/src/invidious/jsonify/api_v1/video_json.cr b/src/invidious/jsonify/api_v1/video_json.cr
index 08cd533f..4ac95c57 100644
--- a/src/invidious/jsonify/api_v1/video_json.cr
+++ b/src/invidious/jsonify/api_v1/video_json.cr
@@ -81,78 +81,62 @@ module Invidious::JSONify::APIv1
           video.adaptive_fmts.each do |fmt|
             json.object do
               # Only available on regular videos, not livestreams/OTF streams
-              if init_range = fmt["initRange"]?
-                json.field "init", "#{init_range["start"]}-#{init_range["end"]}"
-              end
-              if index_range = fmt["indexRange"]?
-                json.field "index", "#{index_range["start"]}-#{index_range["end"]}"
-              end
+              json.field "init", fmt.init_range.to_s if fmt.init_range
+              json.field "index", fmt.index_range.to_s if fmt.index_range
 
               # Not available on MPEG-4 Timed Text (`text/mp4`) streams (livestreams only)
-              json.field "bitrate", fmt["bitrate"].as_i.to_s if fmt["bitrate"]?
+              json.field "bitrate", fmt.bitrate.to_s if fmt.responds_to?(:bitrate)
 
               if proxy
-                json.field "url", Invidious::HttpServer::Utils.proxy_video_url(
-                  fmt["url"].to_s, absolute: true
-                )
+                json.field "url", HttpServer::Utils.proxy_video_url(fmt.url, absolute: true)
               else
-                json.field "url", fmt["url"]
+                json.field "url", fmt.url
               end
 
-              json.field "itag", fmt["itag"].as_i.to_s
-              json.field "type", fmt["mimeType"]
-              json.field "clen", fmt["contentLength"]? || "-1"
+              json.field "itag", fmt.itag.to_s
+              json.field "type", fmt.raw_mime_type
+              json.field "clen", fmt.content_length if fmt.responds_to?(:content_length)
+
 
               # Last modified is a unix timestamp with µS, with the dot omitted.
               # E.g: 1638056732(.)141582
               #
               # On livestreams, it's not present, so always fall back to the
               # current unix timestamp (up to mS precision) for compatibility.
-              last_modified = fmt["lastModified"]?
-              last_modified ||= "#{Time.utc.to_unix_ms}000"
-              json.field "lmt", last_modified
+              last_modified = fmt.last_modified || Time.utc
+              json.field "lmt", "#{last_modified.to_unix_ms}000"
 
-              json.field "projectionType", fmt["projectionType"]
+              json.field "projectionType", fmt.projection_type.to_s.upcase
 
-              height = fmt["height"]?.try &.as_i
-              width = fmt["width"]?.try &.as_i
-
-              fps = fmt["fps"]?.try &.as_i
-
-              if fps
-                json.field "fps", fps
+              # Video-related data
+              if fmt.is_a?(Videos::AdaptativeVideoStream)
+                json.field "size", "#{fmt.video_width}x#{fmt.video_height}"
+                json.field "resolution", "#{fmt.video_height}p"
+                json.field "fps", fmt.video_fps
               end
 
-              if height && width
-                json.field "size", "#{width}x#{height}"
-                json.field "resolution", "#{height}p"
+              json.field "qualityLabel", fmt.label
 
-                quality_label = "#{width > height ? height : width}p"
-
-                if fps && fps > 30
-                  quality_label += fps.to_s
-                end
-
-                json.field "qualityLabel", quality_label
-              end
-
-              if fmt_info = Invidious::Videos::Formats.itag_to_metadata?(fmt["itag"])
+              if fmt_info = Invidious::Videos::Formats.itag_to_metadata?(fmt.itag)
                 json.field "container", fmt_info["ext"]
-                json.field "encoding", fmt_info["vcodec"]? || fmt_info["acodec"]
               end
 
-              # Livestream chunk infos
-              json.field "targetDurationSec", fmt["targetDurationSec"].as_i if fmt.has_key?("targetDurationSec")
-              json.field "maxDvrDurationSec", fmt["maxDvrDurationSec"].as_i if fmt.has_key?("maxDvrDurationSec")
+              json.field "encoding", fmt.codecs
+
+              # Livestream chunk infos. Should be present when `init` and `index` aren't
+              json.field "targetDurationSec", fmt.target_duration if fmt.target_duration
+              json.field "maxDvrDurationSec", fmt.max_dvr_duration if fmt.max_dvr_duration
 
               # Audio-related data
-              json.field "audioQuality", fmt["audioQuality"] if fmt.has_key?("audioQuality")
-              json.field "audioSampleRate", fmt["audioSampleRate"].as_s.to_i if fmt.has_key?("audioSampleRate")
-              json.field "audioChannels", fmt["audioChannels"] if fmt.has_key?("audioChannels")
+              if fmt.is_a?(Videos::AdaptativeAudioStream)
+                json.field "audioQuality", fmt.audio_quality
+                json.field "audioSampleRate", fmt.audio_sample_rate
+                json.field "audioChannels", fmt.audio_channels
+              end
 
               # Extra misc stuff
-              json.field "colorInfo", fmt["colorInfo"] if fmt.has_key?("colorInfo")
-              json.field "captionTrack", fmt["captionTrack"] if fmt.has_key?("captionTrack")
+              # json.field "colorInfo", fmt["colorInfo"] if fmt.has_key?("colorInfo")
+              # json.field "captionTrack", fmt["captionTrack"] if fmt.has_key?("captionTrack")
             end
           end
         end
@@ -163,44 +147,27 @@ module Invidious::JSONify::APIv1
           video.fmt_stream.each do |fmt|
             json.object do
               if proxy
-                json.field "url", Invidious::HttpServer::Utils.proxy_video_url(
-                  fmt["url"].to_s, absolute: true
-                )
+                json.field "url", HttpServer::Utils.proxy_video_url(fmt.url, absolute: true)
               else
-                json.field "url", fmt["url"]
-              end
-              json.field "itag", fmt["itag"].as_i.to_s
-              json.field "type", fmt["mimeType"]
-              json.field "quality", fmt["quality"]
-
-              json.field "bitrate", fmt["bitrate"].as_i.to_s if fmt["bitrate"]?
-
-              height = fmt["height"]?.try &.as_i
-              width = fmt["width"]?.try &.as_i
-
-              fps = fmt["fps"]?.try &.as_i
-
-              if fps
-                json.field "fps", fps
+                json.field "url", fmt.url
               end
 
-              if height && width
-                json.field "size", "#{width}x#{height}"
-                json.field "resolution", "#{height}p"
+              json.field "itag", fmt.itag.to_s
+              json.field "type", fmt.raw_mime_type
+              json.field "quality", fmt.label
 
-                quality_label = "#{width > height ? height : width}p"
+              json.field "bitrate", fmt.bitrate
 
-                if fps && fps > 30
-                  quality_label += fps.to_s
-                end
+              json.field "size", "#{fmt.video_width}x#{fmt.video_height}"
+              json.field "resolution", "#{fmt.video_height}p"
+              json.field "fps", fmt.video_fps
 
-                json.field "qualityLabel", quality_label
-              end
-
-              if fmt_info = Invidious::Videos::Formats.itag_to_metadata?(fmt["itag"])
+              if fmt_info = Videos::Formats.itag_to_metadata?(fmt.itag)
                 json.field "container", fmt_info["ext"]
-                json.field "encoding", fmt_info["vcodec"]? || fmt_info["acodec"]
               end
+
+              json.field "qualityLabel", fmt.label
+              json.field "encoding", fmt.codecs
             end
           end
         end
diff --git a/src/invidious/routes/api/manifest.cr b/src/invidious/routes/api/manifest.cr
index d89e752c..377a4888 100644
--- a/src/invidious/routes/api/manifest.cr
+++ b/src/invidious/routes/api/manifest.cr
@@ -4,7 +4,7 @@ module Invidious::Routes::API::Manifest
     env.response.headers.add("Access-Control-Allow-Origin", "*")
     env.response.content_type = "application/dash+xml"
 
-    local = env.params.query["local"]?.try &.== "true"
+    local = (env.params.query["local"]? == "true")
     id = env.params.url["id"]
     region = env.params.query["region"]?
 
@@ -44,18 +44,18 @@ module Invidious::Routes::API::Manifest
       return manifest
     end
 
-    adaptive_fmts = video.adaptive_fmts
-
+    # Transform URLs for proxying
     if local
-      adaptive_fmts.each do |fmt|
-        fmt["url"] = JSON::Any.new("#{HOST_URL}#{URI.parse(fmt["url"].as_s).request_target}")
+      video.adaptive_fmts.each do |fmt|
+        fmt.url = "#{HOST_URL}#{URI.parse(fmt.url).request_target}"
       end
     end
 
-    audio_streams = video.audio_streams.sort_by { |stream| {stream["bitrate"].as_i} }.reverse!
-    video_streams = video.video_streams.sort_by { |stream| {stream["width"].as_i, stream["fps"].as_i} }.reverse!
+    audio_streams = video.audio_streams.sort_by(&.bitrate).reverse!
+    video_streams = video.video_streams.sort_by { |fmt| {fmt.video_width, fmt.video_fps} }.reverse!
 
-    manifest = XML.build(indent: "  ", encoding: "UTF-8") do |xml|
+    # Build the manifest
+    return XML.build(indent: "  ", encoding: "UTF-8") do |xml|
       xml.element("MPD", "xmlns": "urn:mpeg:dash:schema:mpd:2011",
         "profiles": "urn:mpeg:dash:profile:full:2011", minBufferTime: "PT1.5S", type: "static",
         mediaPresentationDuration: "PT#{video.length_seconds}S") do
@@ -63,34 +63,28 @@ module Invidious::Routes::API::Manifest
           i = 0
 
           {"audio/mp4"}.each do |mime_type|
-            mime_streams = audio_streams.select { |stream| stream["mimeType"].as_s.starts_with? mime_type }
-            next if mime_streams.empty?
+            formats = audio_streams.select(&.mime_type.== mime_type)
+            next if formats.empty?
 
-            mime_streams.each do |fmt|
+            formats.each do |fmt|
               # OTF streams aren't supported yet (See https://github.com/TeamNewPipe/NewPipe/issues/2415)
-              next if !(fmt.has_key?("indexRange") && fmt.has_key?("initRange"))
+              next if (fmt.index_range.nil? || fmt.init_range.nil?)
 
               # Different representations of the same audio should be groupped into one AdaptationSet.
               # However, most players don't support auto quality switching, so we have to trick them
               # into providing a quality selector.
               # See https://github.com/iv-org/invidious/issues/3074 for more details.
-              xml.element("AdaptationSet", id: i, mimeType: mime_type, startWithSAP: 1, subsegmentAlignment: true, label: fmt["bitrate"].to_s + "k") do
-                codecs = fmt["mimeType"].as_s.split("codecs=")[1].strip('"')
-                bandwidth = fmt["bitrate"].as_i
-                itag = fmt["itag"].as_i
-                url = fmt["url"].as_s
-
+              xml.element("AdaptationSet", id: i, mimeType: mime_type, startWithSAP: 1, subsegmentAlignment: true, label: "#{(fmt.bitrate // 1000)} kbps") do
                 xml.element("Role", schemeIdUri: "urn:mpeg:dash:role:2011", value: i == 0 ? "main" : "alternate")
-
-                xml.element("Representation", id: fmt["itag"], codecs: codecs, bandwidth: bandwidth) do
-                  xml.element("AudioChannelConfiguration", schemeIdUri: "urn:mpeg:dash:23003:3:audio_channel_configuration:2011",
-                    value: "2")
-                  xml.element("BaseURL") { xml.text url }
-                  xml.element("SegmentBase", indexRange: "#{fmt["indexRange"]["start"]}-#{fmt["indexRange"]["end"]}") do
-                    xml.element("Initialization", range: "#{fmt["initRange"]["start"]}-#{fmt["initRange"]["end"]}")
+                xml.element("Representation", id: fmt.itag, codecs: fmt.codecs, bandwidth: fmt.bitrate) do
+                  xml.element("AudioChannelConfiguration", schemeIdUri: "urn:mpeg:dash:23003:3:audio_channel_configuration:2011", value: fmt.audio_channels)
+                  xml.element("BaseURL") { xml.text fmt.url }
+                  xml.element("SegmentBase", indexRange: fmt.index_range.to_s) do
+                    xml.element("Initialization", range: fmt.init_range.to_s)
                   end
                 end
               end
+
               i += 1
             end
           end
@@ -98,33 +92,26 @@ module Invidious::Routes::API::Manifest
           potential_heights = {4320, 2160, 1440, 1080, 720, 480, 360, 240, 144}
 
           {"video/mp4"}.each do |mime_type|
-            mime_streams = video_streams.select { |stream| stream["mimeType"].as_s.starts_with? mime_type }
+            mime_streams = video_streams.select(&.mime_type.== mime_type)
             next if mime_streams.empty?
 
             heights = [] of Int32
+
             xml.element("AdaptationSet", id: i, mimeType: mime_type, startWithSAP: 1, subsegmentAlignment: true, scanType: "progressive") do
               mime_streams.each do |fmt|
                 # OTF streams aren't supported yet (See https://github.com/TeamNewPipe/NewPipe/issues/2415)
-                next if !(fmt.has_key?("indexRange") && fmt.has_key?("initRange"))
-
-                codecs = fmt["mimeType"].as_s.split("codecs=")[1].strip('"')
-                bandwidth = fmt["bitrate"].as_i
-                itag = fmt["itag"].as_i
-                url = fmt["url"].as_s
-                width = fmt["width"].as_i
-                height = fmt["height"].as_i
+                next if (fmt.index_range.nil? || fmt.init_range.nil?)
 
                 # Resolutions reported by YouTube player (may not accurately reflect source)
-                height = potential_heights.min_by { |x| (height - x).abs }
+                height = potential_heights.min_by { |x| (fmt.video_height.to_i32 - x).abs }
                 next if unique_res && heights.includes? height
                 heights << height
 
-                xml.element("Representation", id: itag, codecs: codecs, width: width, height: height,
-                  startWithSAP: "1", maxPlayoutRate: "1",
-                  bandwidth: bandwidth, frameRate: fmt["fps"]) do
-                  xml.element("BaseURL") { xml.text url }
-                  xml.element("SegmentBase", indexRange: "#{fmt["indexRange"]["start"]}-#{fmt["indexRange"]["end"]}") do
-                    xml.element("Initialization", range: "#{fmt["initRange"]["start"]}-#{fmt["initRange"]["end"]}")
+                xml.element("Representation", id: fmt.itag, codecs: fmt.codecs, width: fmt.video_width, height: height,
+                  startWithSAP: "1", maxPlayoutRate: "1", bandwidth: fmt.bitrate, frameRate: fmt.video_fps) do
+                  xml.element("BaseURL") { xml.text fmt.url }
+                  xml.element("SegmentBase", indexRange: fmt.index_range.to_s) do
+                    xml.element("Initialization", range: fmt.init_range.to_s)
                   end
                 end
               end
@@ -135,8 +122,6 @@ module Invidious::Routes::API::Manifest
         end
       end
     end
-
-    return manifest
   end
 
   # /api/manifest/dash/id/videoplayback
diff --git a/src/invidious/routes/embed.cr b/src/invidious/routes/embed.cr
index 266f7ba4..f8617d7f 100644
--- a/src/invidious/routes/embed.cr
+++ b/src/invidious/routes/embed.cr
@@ -157,8 +157,8 @@ module Invidious::Routes::Embed
     adaptive_fmts = video.adaptive_fmts
 
     if params.local
-      fmt_stream.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).request_target) }
-      adaptive_fmts.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).request_target) }
+      fmt_stream.each { |fmt| fmt.url = HttpServer::Utils.proxy_video_url(fmt.url) }
+      adaptive_fmts.each { |fmt| fmt.url = HttpServer::Utils.proxy_video_url(fmt.url) }
     end
 
     video_streams = video.video_streams
@@ -192,10 +192,10 @@ module Invidious::Routes::Embed
     thumbnail = "/vi/#{video.id}/maxres.jpg"
 
     if params.raw
-      url = fmt_stream[0]["url"].as_s
+      url = fmt_stream[0].url
 
       fmt_stream.each do |fmt|
-        url = fmt["url"].as_s if fmt["quality"].as_s == params.quality
+        url = fmt.url if fmt.label == params.quality
       end
 
       return env.redirect url
diff --git a/src/invidious/routes/video_playback.cr b/src/invidious/routes/video_playback.cr
index 26852d06..1aa688b2 100644
--- a/src/invidious/routes/video_playback.cr
+++ b/src/invidious/routes/video_playback.cr
@@ -285,14 +285,15 @@ module Invidious::Routes::VideoPlayback
     if itag.nil?
       fmt = video.fmt_stream[-1]?
     else
-      fmt = video.fmt_stream.find(nil) { |f| f["itag"].as_i == itag } || video.adaptive_fmts.find(nil) { |f| f["itag"].as_i == itag }
+      fmt = video.fmt_stream.find(nil, &.itag.== itag) || video.adaptive_fmts.find(nil, &.itag.== itag)
     end
-    url = fmt.try &.["url"]?.try &.as_s
 
-    if !url
+    if !fmt
       haltf env, status_code: 404
     end
 
+    url = fmt.url
+
     if local
       url = URI.parse(url).request_target.not_nil!
       url += "&title=#{URI.encode_www_form(title, space_to_plus: false)}" if title
diff --git a/src/invidious/routes/watch.cr b/src/invidious/routes/watch.cr
index aabe8dfc..4a1982a9 100644
--- a/src/invidious/routes/watch.cr
+++ b/src/invidious/routes/watch.cr
@@ -121,8 +121,8 @@ module Invidious::Routes::Watch
     adaptive_fmts = video.adaptive_fmts
 
     if params.local
-      fmt_stream.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).request_target) }
-      adaptive_fmts.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).request_target) }
+      fmt_stream.each { |fmt| fmt.url = HttpServer::Utils.proxy_video_url(fmt.url) }
+      adaptive_fmts.each { |fmt| fmt.url = HttpServer::Utils.proxy_video_url(fmt.url) }
     end
 
     video_streams = video.video_streams
@@ -160,21 +160,21 @@ module Invidious::Routes::Watch
 
     if params.raw
       if params.listen
-        url = audio_streams[0]["url"].as_s
+        url = audio_streams[0].url
 
         if params.quality.ends_with? "k"
           audio_streams.each do |fmt|
-            if fmt["bitrate"].as_i == params.quality.rchop("k").to_i
-              url = fmt["url"].as_s
+            if fmt.bitrate == params.quality.rchop("k").to_i
+              url = fmt.url
             end
           end
         end
       else
-        url = fmt_stream[0]["url"].as_s
+        url = fmt_stream[0].url
 
         fmt_stream.each do |fmt|
-          if fmt["quality"].as_s == params.quality
-            url = fmt["url"].as_s
+          if fmt.label == params.quality
+            url = fmt.url
           end
         end
       end
diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr
index ae09e736..de432297 100644
--- a/src/invidious/videos.cr
+++ b/src/invidious/videos.cr
@@ -26,6 +26,12 @@ struct Video
   @[DB::Field(ignore: true)]
   @captions = [] of Invidious::Videos::Captions::Metadata
 
+  @[DB::Field(ignore: true)]
+  @adaptive_fmts = [] of Invidious::Videos::AdaptativeStream
+
+  @[DB::Field(ignore: true)]
+  @fmt_stream = [] of Invidious::Videos::ProgressiveHttpStream
+
   @[DB::Field(ignore: true)]
   property description : String?
 
@@ -92,32 +98,32 @@ struct Video
 
   # Methods for parsing streaming data
 
-  def fmt_stream : Array(Hash(String, JSON::Any))
-    if formats = info.dig?("streamingData", "formats")
-      return formats
-        .as_a.map(&.as_h)
-        .sort_by! { |f| f["width"]?.try &.as_i || 0 }
-    else
-      return [] of Hash(String, JSON::Any)
+  def fmt_stream : Array(Invidious::Videos::ProgressiveHttpStream)
+    if @fmt_stream.empty?
+      if formats = info.dig?("streamingData", "formats")
+        @fmt_stream = Invidious::Videos.parse_progressive_formats(formats)
+      end
     end
+
+    return @fmt_stream
   end
 
-  def adaptive_fmts : Array(Hash(String, JSON::Any))
-    if formats = info.dig?("streamingData", "adaptiveFormats")
-      return formats
-        .as_a.map(&.as_h)
-        .sort_by! { |f| f["width"]?.try &.as_i || 0 }
-    else
-      return [] of Hash(String, JSON::Any)
+  def adaptive_fmts : Array(Invidious::Videos::AdaptativeStream)
+    if @adaptive_fmts.empty?
+      if formats = info.dig?("streamingData", "adaptiveFormats")
+        @adaptive_fmts = Invidious::Videos.parse_adaptative_formats(formats)
+      end
     end
+
+    return @adaptive_fmts
   end
 
-  def video_streams
-    adaptive_fmts.select &.["mimeType"]?.try &.as_s.starts_with?("video")
+  def video_streams : Array(Invidious::Videos::AdaptativeVideoStream)
+    self.adaptive_fmts.select(Invidious::Videos::AdaptativeVideoStream)
   end
 
-  def audio_streams
-    adaptive_fmts.select &.["mimeType"]?.try &.as_s.starts_with?("audio")
+  def audio_streams : Array(Invidious::Videos::AdaptativeAudioStream)
+    self.adaptive_fmts.select(Invidious::Videos::AdaptativeAudioStream)
   end
 
   # Misc. methods
diff --git a/src/invidious/videos/formats.cr b/src/invidious/videos/formats.cr
index e98e7257..c49c7411 100644
--- a/src/invidious/videos/formats.cr
+++ b/src/invidious/videos/formats.cr
@@ -1,5 +1,5 @@
 module Invidious::Videos::Formats
-  def self.itag_to_metadata?(itag : JSON::Any)
+  def self.itag_to_metadata?(itag : Int)
     return FORMATS[itag.to_s]?
   end
 
diff --git a/src/invidious/videos/streams.cr b/src/invidious/videos/streams.cr
new file mode 100644
index 00000000..f2f9d9ce
--- /dev/null
+++ b/src/invidious/videos/streams.cr
@@ -0,0 +1,336 @@
+module Invidious::Videos
+  # ------------------
+  #  Structs & Enums
+  # ------------------
+
+  # "AUDIO_QUALITY_"
+  enum AudioQuality
+    UltraLow
+    Low
+    Medium
+  end
+
+  # "SPATIAL_AUDIO_TYPE_"
+  enum SpatialType
+    None
+    AmbisonicsQuad
+    Ambisonics_5_1
+    FoaWithNonDiegetic
+  end
+
+  enum ProjType
+    Unknown
+    Rectangular
+    Equirectangular
+    EquirectangularThreedTopBottom
+    Mesh
+  end
+
+  struct ByteRange
+    getter start : UInt32
+    getter end : UInt32
+
+    def initialize(@start, @end)
+    end
+
+    def to_s
+      return "#{@start}-#{@end}"
+    end
+  end
+
+  # ------------------
+  #  Traits
+  # ------------------
+
+  # Properties common to all streams containing audio
+  module AudioProperties
+    macro included
+      property audio_quality : AudioQuality
+      property audio_sample_rate : UInt32
+      property audio_channels : UInt8
+      property audio_loudness_db : Float64 = 0.0
+      property audio_spatial_type : SpatialType
+
+      private macro init_audio_properties(format)
+        @audio_quality = AudioQuality.parse(format["audioQuality"].as_s.lchop("AUDIO_QUALITY_"))
+        @audio_sample_rate = format["audioSampleRate"].as_s.to_u32
+        @audio_channels = format["audioChannels"].as_i.to_u8
+
+        if _loudness = format["loudnessDb"]?
+          @audio_loudness_db = _loudness.as_f? || _loudness.as_i64?.try &.to_f || 0.0
+        end
+
+        _spatial_type = format["spatialAudioType"]?.try &.as_s || "SPATIAL_AUDIO_TYPE_NONE"
+        @audio_spatial_type = SpatialType.parse(_spatial_type.lchop("SPATIAL_AUDIO_TYPE_"))
+      end
+    end
+  end
+
+  # Properties common to all streams containing video
+  module VideoProperties
+    macro included
+      property video_width : UInt32
+      property video_height : UInt32
+      property video_fps : UInt16
+
+      private macro init_video_properties(format)
+        @video_width = format["width"].as_i.to_u32
+        @video_height = format["height"].as_i.to_u32
+        @video_fps = format["fps"].as_i.to_u16
+      end
+    end
+  end
+
+  # Properties common to all audio & video streams
+  module AVCommonProperties
+    macro included
+      property bitrate : UInt64
+      property bitrate_avg : UInt64?
+
+      # Itag 22 sometimes doesn't have a contentLength ?!
+      # Not present on livestreams
+      property content_length : UInt64?
+
+      private macro init_av_common_properties(format)
+        @bitrate = format["bitrate"].as_i.to_u64
+        @bitrate_avg = format["averageBitrate"]?.try &.as_i.to_u64
+        @content_length = format["contentLength"]?.try &.as_s.to_u64
+      end
+    end
+  end
+
+  # Properties that only applies to multi-lingual adaptative streams.
+  # They apply to audio and text streams (notably text/mp4).
+  #
+  # Sample JSON for an audio track:
+  #   "audioTrack": {
+  #     "displayName": "Arabic",
+  #     "id": "ar.0",
+  #     "audioIsDefault": false
+  #   },
+  #
+  # Sample JSON for a caption track:
+  #  "captionTrack": {
+  #    "displayName": "English",
+  #    "vssId": ".en.eEY6OEpapPo",
+  #    "languageCode": "en"
+  #  }
+  module TrackProperties
+    macro included
+      property track_id : String?
+      property track_name : String = "default"
+      property iso_code : String?
+      property default : Bool = false
+
+      private macro init_track_properties(format)
+        if audio_track = format["audioTrack"]?
+          id = audio_track["id"].as_s
+
+          @track_id = id
+          @track_name = audio_track["displayName"].as_s
+          @default = audio_track["audioIsDefault"].as_bool
+
+          _xtags = format["xtags"]?
+            .try { |i| Base64.decode(URI.decode_www_form(i.as_s)) }
+            .try { |i| Protodec::Any.parse(IO::Memory.new(i)) }
+
+          @iso_code = _xtags
+            .try &.dig?("1:1:embedded", "2:2:string")
+            .try &.as_s || id.rchop(".0")
+          #
+        elsif caption_track = format["captionTrack"]?
+          @track_name = caption_track["displayName"].as_s
+          @track_id = caption_track["vssId"].as_s
+          @iso_code = caption_track["languageCode"].as_s
+        end
+      end
+    end
+  end
+
+  # Properties that only apply to adaptative streams of regular videos
+  module AdaptativeProperties
+    macro included
+      property init_range : ByteRange?
+      property index_range : ByteRange?
+
+      private macro init_adaptative_properties(format)
+        if init_range = format["initRange"]?
+          @init_range = ByteRange.new(
+            init_range["start"].as_s.to_u32,
+            init_range["end"].as_s.to_u32
+          )
+        end
+
+        if index_range = format["indexRange"]?
+          @index_range = ByteRange.new(
+            index_range["start"].as_s.to_u32,
+            index_range["end"].as_s.to_u32
+          )
+        end
+      end
+    end
+  end
+
+  # Properties that only apply to adaptative streams from livestrams
+  # (either in progress, or recenlty ended)
+  module LiveProperties
+    macro included
+      property target_duration : UInt32?
+      property max_dvr_duration : UInt32?
+
+      private macro init_live_properties(format)
+        @target_duration = format["targetDurationSec"]?.try(&.as_i.to_u32)
+        @max_dvr_duration = format["maxDvrDurationSec"]?.try(&.as_i.to_u32)
+      end
+    end
+  end
+
+  # ------------------
+  #  Base class
+  # ------------------
+
+  # Base stream class defining all the common properties for all streams
+  abstract class Stream
+    getter itag : UInt16
+    getter label : String
+    property url : String
+
+    getter raw_mime_type : String
+    getter mime_type : String
+    getter codecs : String
+
+    getter last_modified : Time?
+
+    getter projection_type : ProjType
+
+    def initialize(format : JSON::Any, @label)
+      @itag = format["itag"].as_i.to_u16
+      @url = format["url"].as_s
+
+      @raw_mime_type = format["mimeType"].as_s
+
+      # Extract MIME type and codecs from the raw mimeType string
+      @mime_type, raw_codecs = @raw_mime_type.split(';')
+      @codecs = raw_codecs.lchop(" codecs=\"").rchop('"')
+
+      # Last modified is not present on livestreams
+      if last_modified = format["lastModified"]?.try &.as_s
+        # E.g "1670664306(.)849305"
+        # Note: (.) is not present in the input data, it's used here to show
+        # the demarcation between seconds and microseconds.
+        timestamp = last_modified[0...10]
+        microseconds = last_modified[10..]
+
+        @last_modified = Time.utc(
+          seconds: timestamp.to_i64,
+          nanoseconds: microseconds.to_i * 1000
+        )
+      end
+
+      @projection_type = ProjType.parse(format["projectionType"].as_s)
+
+      # Initialize extra properties as required
+      {% begin %}
+        {%
+          properties_types = [
+            AudioProperties,
+            VideoProperties,
+            TrackProperties,
+            AVCommonProperties,
+            AdaptativeProperties,
+            LiveProperties,
+          ]
+        %}
+
+        {% for type in properties_types %}
+          # Call the appropriate initialization macro if self
+          # inherits from the given type
+          {% if @type < type %}
+            init_{{type.id.split("::").last.id.underscore}}(format)
+          {% end %}
+        {% end %}
+      {% end %}
+    end
+  end
+
+  # ------------------
+  #  Children classes
+  # ------------------
+
+  # An HTTP progressive stream (audio + video)
+  class ProgressiveHttpStream < Stream
+    include AudioProperties
+    include VideoProperties
+    include AVCommonProperties
+  end
+
+  # Base class for adaptative (DASH) streams
+  abstract class AdaptativeStream < Stream
+    include AdaptativeProperties
+    include LiveProperties
+  end
+
+  # An audio-only adaptative (DASH) stream
+  class AdaptativeAudioStream < AdaptativeStream
+    include AudioProperties
+    include AVCommonProperties
+  end
+
+  # An audio-only adaptative (DASH) stream with track informations
+  class AdaptativeAudioTrackStream < AdaptativeAudioStream
+    include TrackProperties
+  end
+
+  # A video-only adaptative (DASH) stream
+  class AdaptativeVideoStream < AdaptativeStream
+    include VideoProperties
+    include AVCommonProperties
+  end
+
+  # A text-only adaptative (DASH) stream
+  class AdaptativeTextStream < AdaptativeStream
+    include TrackProperties
+  end
+
+  # ------------------------
+  #  High-level functions
+  # ------------------------
+
+  def self.parse_progressive_formats(formats : JSON::Any) : Array(ProgressiveHttpStream)
+    return formats.as_a.map do |format|
+      label = format["quality"].to_s
+      ProgressiveHttpStream.new(format, label)
+    end
+  end
+
+  def self.parse_adaptative_formats(formats : JSON::Any) : Array(AdaptativeStream)
+    return formats.as_a.compact_map do |format|
+      # "Encrypted" video URLs are not supported. They require some logic
+      # to extract the JS and run it in a dedicated JS engine like ducktape.
+      # See: https://github.com/iv-org/invidious/issues/3245
+      next if (format["cipher"]? || format["signatureCipher"]?)
+
+      # OTF streams are not supported either.
+      # See https://github.com/TeamNewPipe/NewPipe/issues/2415
+      next if format["type"]? == "FORMAT_STREAM_TYPE_OTF"
+
+      # Handle the various types of adaptative formats
+      case format["mimeType"].as_s
+      when .starts_with?("audio/")
+        label = format["audioQuality"].as_s.lchop("AUDIO_QUALITY_").downcase
+
+        if format["audioTrack"]?
+          AdaptativeAudioTrackStream.new(format, label)
+        else
+          AdaptativeAudioStream.new(format, label)
+        end
+      when .starts_with?("video/")
+        label = format["qualityLabel"].to_s
+        AdaptativeVideoStream.new(format, label)
+      when .starts_with?("text/")
+        label = format.dig("captionTrack", "displayName").as_s
+        AdaptativeTextStream.new(format, label)
+      end
+    end
+  end
+end
diff --git a/src/invidious/views/components/player.ecr b/src/invidious/views/components/player.ecr
index 5c28358b..395ead68 100644
--- a/src/invidious/views/components/player.ecr
+++ b/src/invidious/views/components/player.ecr
@@ -12,19 +12,19 @@
                best_m4a_stream_index = 0
                best_m4a_stream_bitrate = 0
                audio_streams.each_with_index do |fmt, i|
-                 bandwidth = fmt["bitrate"].as_i
-                 if (fmt["mimeType"].as_s.starts_with?("audio/mp4") && bandwidth > best_m4a_stream_bitrate)
+                 bandwidth = fmt.bitrate
+                 if (fmt.mime_type == "audio/mp4" && bandwidth > best_m4a_stream_bitrate)
                    best_m4a_stream_bitrate = bandwidth
                    best_m4a_stream_index = i
                  end
                end
 
                audio_streams.each_with_index do |fmt, i|
-                src_url  = "/latest_version?id=#{video.id}&itag=#{fmt["itag"]}"
+                src_url  = "/latest_version?id=#{video.id}&itag=#{fmt.itag}"
                 src_url += "&local=true" if params.local
 
-                bitrate = fmt["bitrate"]
-                mimetype = HTML.escape(fmt["mimeType"].as_s)
+                bitrate = fmt.bitrate // 1000
+                mimetype = HTML.escape(fmt.raw_mime_type)
 
                 selected = (i == best_m4a_stream_index)
             %>
@@ -39,14 +39,14 @@
             <% end %>
 
             <%
-            fmt_stream.reject! { |f| f["itag"] == 17 }
-            fmt_stream.sort_by! {|f| params.quality == f["quality"] ? 0 : 1 }
+            fmt_stream.reject!(&.itag.== 17)
+            fmt_stream.sort_by! { |f| params.quality == f.label ? 0 : 1 }
             fmt_stream.each_with_index do |fmt, i|
-                src_url  = "/latest_version?id=#{video.id}&itag=#{fmt["itag"]}"
+                src_url  = "/latest_version?id=#{video.id}&itag=#{fmt.itag}"
                 src_url += "&local=true" if params.local
 
-                quality = fmt["quality"]
-                mimetype = HTML.escape(fmt["mimeType"].as_s)
+                quality = fmt.label
+                mimetype = HTML.escape(fmt.raw_mime_type)
 
                 selected = params.quality ? (params.quality == quality) : (i == 0)
             %>