Integrate transcript captions into captions API

2026-01-26 00:31:38 +00:00 · 2023-07-23 05:02:02 -07:00
parent caac7e2166
commit e4942b188f
5 changed files with 91 additions and 54 deletions
--- a/src/invidious/videos/caption.cr
+++ b/src/invidious/videos/caption.cr
@@ -6,7 +6,9 @@ module Invidious::Videos
    property language_code : String
    property base_url : String

-    def initialize(@name, @language_code, @base_url)
+    property auto_generated : Bool
+
+    def initialize(@name, @language_code, @base_url, @auto_generated)
    end

    # Parse the JSON structure from Youtube
@@ -25,7 +27,12 @@ module Invidious::Videos
        language_code = caption["languageCode"].to_s
        base_url = caption["baseUrl"].to_s

-        captions_list << CaptionMetadata.new(name, language_code, base_url)
+        auto_generated = false
+        if caption["kind"]? && caption["kind"] == "asr"
+          auto_generated = true
+        end
+
+        captions_list << CaptionMetadata.new(name, language_code, base_url, auto_generated)
      end

      return captions_list
--- a/src/invidious/videos/transcript.cr
+++ b/src/invidious/videos/transcript.cr
@@ -85,7 +85,13 @@ module Invidious::Videos

      lines = [] of TranscriptLine
      body.each do |line|
+        # Transcript section headers. They are not apart of the captions and as such we can safely skip them.
+        if line.as_h.has_key?("transcriptSectionHeaderRenderer")
+          next
+        end
+
        line = line["transcriptSegmentRenderer"]
+
        start_ms = line["startMs"].as_s.to_i.millisecond
        end_ms = line["endMs"].as_s.to_i.millisecond