Files
nadeko_invidious/src/invidious/videos/transcript.cr
syeopite f317e6620a Fix extraction of transcript headings
Innertube structure was changed
2025-05-10 20:04:07 -07:00

163 lines
5.2 KiB
Crystal

module Invidious::Videos
# A `Transcripts` struct encapsulates a sequence of lines that together forms the whole transcript for a given YouTube video.
# These lines can be categorized into two types: section headings and regular lines representing content from the video.
struct Transcript
# Types
record HeadingLine, start_ms : Time::Span, end_ms : Time::Span, line : String
record RegularLine, start_ms : Time::Span, end_ms : Time::Span, line : String
alias TranscriptLine = HeadingLine | RegularLine
property lines : Array(TranscriptLine)
property language_code : String
property auto_generated : Bool
# User friendly label for the current transcript.
# Example: "English (auto-generated)"
property label : String
# Initializes a new Transcript struct with the contents and associated metadata describing it
def initialize(@lines : Array(TranscriptLine), @language_code : String, @auto_generated : Bool, @label : String)
end
# Generates a protobuf string to fetch the requested transcript from YouTube
def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String
kind = auto_generated ? "asr" : ""
object = {
"1:0:string" => video_id,
"2:base64" => {
"1:string" => kind,
"2:string" => language_code,
"3:string" => "",
},
"3:varint" => 1_i64,
"5:string" => "engagement-panel-searchable-transcript-search-panel",
"6:varint" => 1_i64,
"7:varint" => 1_i64,
"8:varint" => 1_i64,
}
params = object.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return params
end
# Constructs a Transcripts struct from the initial YouTube response
def self.from_raw(initial_data : Hash(String, JSON::Any), language_code : String, auto_generated : Bool)
transcript_panel = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer",
"content", "transcriptSearchPanelRenderer")
segment_list = transcript_panel.dig("body", "transcriptSegmentListRenderer")
if !segment_list["initialSegments"]?
raise NotFoundException.new("Requested transcript does not exist")
end
# Extract user-friendly label for the current transcript
footer_language_menu = transcript_panel.dig?(
"footer", "transcriptFooterRenderer", "languageMenu", "sortFilterSubMenuRenderer", "subMenuItems"
)
if footer_language_menu
label = footer_language_menu.as_a.select(&.["selected"].as_bool)[0]["title"].as_s
else
label = language_code
end
# Extract transcript lines
initial_segments = segment_list["initialSegments"].as_a
lines = [] of TranscriptLine
initial_segments.each do |line|
if unpacked_line = line["transcriptSectionHeaderRenderer"]?
line_type = HeadingLine
text = (unpacked_line.dig?("sectionHeader", "sectionHeaderViewModel", "headline", "content").try &.as_s) || ""
else
unpacked_line = line["transcriptSegmentRenderer"]
text = extract_text(unpacked_line["snippet"]) || ""
line_type = RegularLine
end
start_ms = unpacked_line["startMs"].as_s.to_i.millisecond
end_ms = unpacked_line["endMs"].as_s.to_i.millisecond
lines << line_type.new(start_ms, end_ms, text)
end
return Transcript.new(
lines: lines,
language_code: language_code,
auto_generated: auto_generated,
label: label
)
end
# Converts transcript lines to a WebVTT file
#
# This is used within Invidious to replace subtitles
# as to workaround YouTube's rate-limited timedtext endpoint.
def to_vtt
settings_field = {
"Kind" => "captions",
"Language" => @language_code,
}
vtt = WebVTT.build(settings_field) do |builder|
@lines.each do |line|
# Section headers are excluded from the VTT conversion as to
# match the regular captions returned from YouTube as much as possible
next if line.is_a? HeadingLine
builder.cue(line.start_ms, line.end_ms, line.line)
end
end
return vtt
end
def to_json(json : JSON::Builder)
json.field "languageCode", @language_code
json.field "autoGenerated", @auto_generated
json.field "label", @label
json.field "body" do
json.array do
@lines.each do |line|
json.object do
if line.is_a? HeadingLine
json.field "type", "heading"
else
json.field "type", "regular"
end
json.field "startMs", line.start_ms.total_milliseconds
json.field "endMs", line.end_ms.total_milliseconds
json.field "line", line.line
end
end
end
end
end
def to_json
JSON.build do |json|
json.object do
json.field "transcript" do
json.object do
to_json(json)
end
end
end
end
end
end
end