diff --git a/src/invidious.cr b/src/invidious.cr index 082c05c5..60e68fa4 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -208,35 +208,35 @@ get "/api/v1/storyboards/:id" do |env| storyboard = storyboard[0] end - webvtt = <<-END_VTT - WEBVTT + String.build do |str| + str << <<-END_VTT + WEBVTT - END_VTT + END_VTT - start_time = 0.milliseconds - end_time = storyboard[:interval].milliseconds + start_time = 0.milliseconds + end_time = storyboard[:interval].milliseconds - storyboard[:storyboard_count].times do |i| - host_url = make_host_url(config, Kemal.config) - url = storyboard[:url].gsub("$M", i).gsub("https://i9.ytimg.com", host_url) + storyboard[:storyboard_count].times do |i| + host_url = make_host_url(config, Kemal.config) + url = storyboard[:url].gsub("$M", i).gsub("https://i9.ytimg.com", host_url) - storyboard[:storyboard_height].times do |j| - storyboard[:storyboard_width].times do |k| - webvtt += <<-END_CUE - #{start_time}.000 --> #{end_time}.000 - #{url}#xywh=#{storyboard[:width] * k},#{storyboard[:height] * j},#{storyboard[:width]},#{storyboard[:height]} + storyboard[:storyboard_height].times do |j| + storyboard[:storyboard_width].times do |k| + str << <<-END_CUE + #{start_time}.000 --> #{end_time}.000 + #{url}#xywh=#{storyboard[:width] * k},#{storyboard[:height] * j},#{storyboard[:width]},#{storyboard[:height]} - END_CUE + END_CUE - start_time += storyboard[:interval].milliseconds - end_time += storyboard[:interval].milliseconds + start_time += storyboard[:interval].milliseconds + end_time += storyboard[:interval].milliseconds + end end end end - - webvtt end get "/api/v1/captions/:id" do |env| @@ -306,7 +306,7 @@ get "/api/v1/captions/:id" do |env| caption = caption[0] end - url = caption.baseUrl + "&tlang=#{tlang}" + url = "#{caption.baseUrl}&tlang=#{tlang}" # Auto-generated captions often have cues that aren't aligned properly with the video, # as well as some other markup that makes it cumbersome, so we try to fix that here @@ -314,46 +314,47 @@ get "/api/v1/captions/:id" do |env| caption_xml = client.get(url).body caption_xml = XML.parse(caption_xml) - webvtt = <<-END_VTT - WEBVTT - Kind: captions - Language: #{tlang || caption.languageCode} + webvtt = String.build do |str| + str << <<-END_VTT + WEBVTT + Kind: captions + Language: #{tlang || caption.languageCode} - END_VTT + END_VTT - caption_nodes = caption_xml.xpath_nodes("//transcript/text") - caption_nodes.each_with_index do |node, i| - start_time = node["start"].to_f.seconds - duration = node["dur"]?.try &.to_f.seconds - duration ||= start_time + caption_nodes = caption_xml.xpath_nodes("//transcript/text") + caption_nodes.each_with_index do |node, i| + start_time = node["start"].to_f.seconds + duration = node["dur"]?.try &.to_f.seconds + duration ||= start_time - if caption_nodes.size > i + 1 - end_time = caption_nodes[i + 1]["start"].to_f.seconds - else - end_time = start_time + duration + if caption_nodes.size > i + 1 + end_time = caption_nodes[i + 1]["start"].to_f.seconds + else + end_time = start_time + duration + end + + start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}" + end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}" + + text = HTML.unescape(node.content) + text = text.gsub(//, "") + text = text.gsub(/<\/font>/, "") + if md = text.match(/(?.*) : (?.*)/) + text = "#{md["text"]}" + end + + str << <<-END_CUE + #{start_time} --> #{end_time} + #{text} + + + END_CUE end - - start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}" - end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}" - - text = HTML.unescape(node.content) - text = text.gsub(//, "") - text = text.gsub(/<\/font>/, "") - if md = text.match(/(?.*) : (?.*)/) - text = "#{md["text"]}" - end - - webvtt += <<-END_CUE - #{start_time} --> #{end_time} - #{text} - - - END_CUE end else - url += "&format=vtt" - webvtt = client.get(url).body + webvtt = client.get("#{url}&format=vtt").body end if title = env.params.query["title"]? @@ -1521,12 +1522,24 @@ get "/videoplayback" do |env| end end + client = make_client(URI.parse(host), region) + response = HTTP::Client::Response.new(403) 5.times do begin - client = make_client(URI.parse(host), region) response = client.head(url, headers) - break + + if response.headers["Location"]? + location = URI.parse(response.headers["Location"]) + env.response.headers["Access-Control-Allow-Origin"] = "*" + + host = "#{location.scheme}://#{location.host}" + client = make_client(URI.parse(host), region) + + url = "#{location.full_path}&host=#{location.host}#{region ? "®ion=#{region}" : ""}" + else + break + end rescue Socket::Addrinfo::Error if !mns.empty? mn = mns.pop @@ -1534,25 +1547,12 @@ get "/videoplayback" do |env| fvip = "3" host = "https://r#{fvip}---#{mn}.googlevideo.com" + client = make_client(URI.parse(host), region) rescue ex + pp ex end end - if response.headers["Location"]? - url = URI.parse(response.headers["Location"]) - host = url.host - env.response.headers["Access-Control-Allow-Origin"] = "*" - - url = url.full_path - url += "&host=#{host}" - - if region - url += "®ion=#{region}" - end - - next env.redirect url - end - if response.status_code >= 400 env.response.status_code = response.status_code next @@ -1609,6 +1609,8 @@ get "/videoplayback" do |env| chunk_end = chunk_start + HTTP_CHUNK_SIZE - 1 end + client = make_client(URI.parse(host), region) + # TODO: Record bytes written so we can restart after a chunk fails while true if !range_end && content_length @@ -1626,7 +1628,6 @@ get "/videoplayback" do |env| headers["Range"] = "bytes=#{chunk_start}-#{chunk_end}" begin - client = make_client(URI.parse(host), region) client.get(url, headers) do |response| if first_chunk if !env.request.headers["Range"]? && response.status_code == 206 @@ -1645,11 +1646,7 @@ get "/videoplayback" do |env| if location = response.headers["Location"]? location = URI.parse(location) - location = "#{location.full_path}&host=#{location.host}" - - if region - location += "®ion=#{region}" - end + location = "#{location.full_path}&host=#{location.host}#{region ? "®ion=#{region}" : ""}" env.redirect location break @@ -1676,6 +1673,8 @@ get "/videoplayback" do |env| rescue ex if ex.message != "Error reading socket: Connection reset by peer" break + else + client = make_client(URI.parse(host), region) end end diff --git a/src/invidious/helpers/handlers.cr b/src/invidious/helpers/handlers.cr index 95936199..af1af8fc 100644 --- a/src/invidious/helpers/handlers.cr +++ b/src/invidious/helpers/handlers.cr @@ -110,10 +110,9 @@ class APIHandler < Kemal::Handler call_next env env.response.output.rewind - response = env.response.output.gets_to_end - if env.response.headers["Content-Type"]?.try &.== "application/json" - response = JSON.parse(response) + if env.response.headers.includes_word?("Content-Type", "application/json") + response = JSON.parse(env.response.output) if fields_text = env.params.query["fields"]? begin @@ -129,6 +128,8 @@ class APIHandler < Kemal::Handler else response = response.to_json end + else + response = env.response.output.gets_to_end end rescue ex ensure diff --git a/src/invidious/search.cr b/src/invidious/search.cr index 7a36f32e..a55bb216 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -277,96 +277,97 @@ end def produce_search_params(sort : String = "relevance", date : String = "", content_type : String = "", duration : String = "", features : Array(String) = [] of String) - head = "\x08" - head += case sort - when "relevance" - "\x00" - when "rating" - "\x01" - when "upload_date", "date" - "\x02" - when "view_count", "views" - "\x03" - else - raise "No sort #{sort}" - end + header = IO::Memory.new + header.write Bytes[0x08] + header.write case sort + when "relevance" + Bytes[0x00] + when "rating" + Bytes[0x01] + when "upload_date", "date" + Bytes[0x02] + when "view_count", "views" + Bytes[0x03] + else + raise "No sort #{sort}" + end - body = "" - body += case date - when "hour" - "\x08\x01" - when "today" - "\x08\x02" - when "week" - "\x08\x03" - when "month" - "\x08\x04" - when "year" - "\x08\x05" - else - "" - end + body = IO::Memory.new + body.write case date + when "hour" + Bytes[0x08, 0x01] + when "today" + Bytes[0x08, 0x02] + when "week" + Bytes[0x08, 0x03] + when "month" + Bytes[0x08, 0x04] + when "year" + Bytes[0x08, 0x05] + else + Bytes.new(0) + end - body += case content_type - when "video" - "\x10\x01" - when "channel" - "\x10\x02" - when "playlist" - "\x10\x03" - when "movie" - "\x10\x04" - when "show" - "\x10\x05" - when "all" - "" - else - "\x10\x01" - end + body.write case content_type + when "video" + Bytes[0x10, 0x01] + when "channel" + Bytes[0x10, 0x02] + when "playlist" + Bytes[0x10, 0x03] + when "movie" + Bytes[0x10, 0x04] + when "show" + Bytes[0x10, 0x05] + when "all" + Bytes.new(0) + else + Bytes[0x10, 0x01] + end - body += case duration - when "short" - "\x18\x01" - when "long" - "\x18\x02" - else - "" - end + body.write case duration + when "short" + Bytes[0x18, 0x01] + when "long" + Bytes[0x18, 0x12] + else + Bytes.new(0) + end features.each do |feature| - body += case feature - when "hd" - "\x20\x01" - when "subtitles" - "\x28\x01" - when "creative_commons", "cc" - "\x30\x01" - when "3d" - "\x38\x01" - when "live", "livestream" - "\x40\x01" - when "purchased" - "\x48\x01" - when "4k" - "\x70\x01" - when "360" - "\x78\x01" - when "location" - "\xb8\x01\x01" - when "hdr" - "\xc8\x01\x01" - else - raise "Unknown feature #{feature}" - end + body.write case feature + when "hd" + Bytes[0x20, 0x01] + when "subtitles" + Bytes[0x28, 0x01] + when "creative_commons", "cc" + Bytes[0x30, 0x01] + when "3d" + Bytes[0x38, 0x01] + when "live", "livestream" + Bytes[0x40, 0x01] + when "purchased" + Bytes[0x48, 0x01] + when "4k" + Bytes[0x70, 0x01] + when "360" + Bytes[0x78, 0x01] + when "location" + Bytes[0xb8, 0x01, 0x01] + when "hdr" + Bytes[0xc8, 0x01, 0x01] + else + Bytes.new(0) + end end + token = header if !body.empty? - token = head + "\x12" + body.size.unsafe_chr + body - else - token = head + token.write Bytes[0x12, body.bytesize] + token.write body.to_slice end - token = Base64.urlsafe_encode(token) + token = Base64.urlsafe_encode(token.to_slice) token = URI.escape(token) return token diff --git a/src/invidious/users.cr b/src/invidious/users.cr index 8bd82bf1..d3da28d7 100644 --- a/src/invidious/users.cr +++ b/src/invidious/users.cr @@ -295,8 +295,7 @@ def get_subscription_feed(db, user, max_results = 40, page = 1) args = arg_array(notifications) - notifications = db.query_all("SELECT * FROM channel_videos WHERE id IN (#{args}) - ORDER BY published DESC", notifications, as: ChannelVideo) + notifications = db.query_all("SELECT * FROM channel_videos WHERE id IN (#{args}) ORDER BY published DESC", notifications, as: ChannelVideo) videos = [] of ChannelVideo notifications.sort_by! { |video| video.published }.reverse! @@ -322,14 +321,11 @@ def get_subscription_feed(db, user, max_results = 40, page = 1) else values = "VALUES #{user.watched.map { |id| %(('#{id}')) }.join(",")}" end - videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} WHERE \ - NOT id = ANY (#{values}) \ - ORDER BY ucid, published DESC", as: ChannelVideo) + videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} WHERE NOT id = ANY (#{values}) ORDER BY ucid, published DESC", as: ChannelVideo) else # Show latest video from each channel - videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} \ - ORDER BY ucid, published DESC", as: ChannelVideo) + videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} ORDER BY ucid, published DESC", as: ChannelVideo) end videos.sort_by! { |video| video.published }.reverse! @@ -342,14 +338,11 @@ def get_subscription_feed(db, user, max_results = 40, page = 1) else values = "VALUES #{user.watched.map { |id| %(('#{id}')) }.join(",")}" end - videos = PG_DB.query_all("SELECT * FROM #{view_name} WHERE \ - NOT id = ANY (#{values}) \ - ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo) + videos = PG_DB.query_all("SELECT * FROM #{view_name} WHERE NOT id = ANY (#{values}) ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo) else # Sort subscriptions as normal - videos = PG_DB.query_all("SELECT * FROM #{view_name} \ - ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo) + videos = PG_DB.query_all("SELECT * FROM #{view_name} ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo) end end @@ -366,16 +359,11 @@ def get_subscription_feed(db, user, max_results = 40, page = 1) videos.sort_by! { |video| video.author }.reverse! end - notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email, - as: Array(String)) + notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email, as: Array(String)) notifications = videos.select { |v| notifications.includes? v.id } videos = videos - notifications end - if !limit - videos = videos[0..max_results] - end - return videos, notifications end diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 03fe9a26..85ccb9a2 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -247,6 +247,7 @@ end struct Video property player_json : JSON::Any? + property recommended_json : JSON::Any? module HTTPParamConverter def self.from_rs(rs) @@ -425,9 +426,29 @@ struct Video json.field "videoThumbnails" do generate_thumbnails(json, rv["id"], config, kemal_config) end + json.field "author", rv["author"] + json.field "authorUrl", rv["author_url"] if rv["author_url"]? + json.field "authorId", rv["ucid"] if rv["ucid"]? + if rv["author_thumbnail"]? + json.field "authorThumbnails" do + json.array do + qualities = {32, 48, 76, 100, 176, 512} + + qualities.each do |quality| + json.object do + json.field "url", rv["author_thumbnail"].gsub(/s\d+-/, "s#{quality}-") + json.field "width", quality + json.field "height", quality + end + end + end + end + end + json.field "lengthSeconds", rv["length_seconds"].to_i json.field "viewCountText", rv["short_view_count_text"] + json.field "viewCount", rv["view_count"].to_i if rv["view_count"]? end end end @@ -685,12 +706,14 @@ struct Video return audio_streams end - def player_response - if !@player_json - @player_json = JSON.parse(@info["player_response"]) - end + def recommended_videos + @recommended_json = JSON.parse(@info["recommended_videos"]) if !@recommended_json + @recommended_json.not_nil! + end - return @player_json.not_nil! + def player_response + @player_json = JSON.parse(@info["player_response"]) if !@player_json + @player_json.not_nil! end def storyboards @@ -945,19 +968,17 @@ def extract_polymer_config(body, html) recommended_videos.try &.each do |compact_renderer| if compact_renderer["compactRadioRenderer"]? || compact_renderer["compactPlaylistRenderer"]? # TODO - elsif compact_renderer["compactVideoRenderer"]? - compact_renderer = compact_renderer["compactVideoRenderer"] - + elsif video_renderer = compact_renderer["compactVideoRenderer"]? recommended_video = HTTP::Params.new - recommended_video["id"] = compact_renderer["videoId"].as_s - recommended_video["title"] = compact_renderer["title"]["simpleText"].as_s - recommended_video["author"] = compact_renderer["shortBylineText"]["runs"].as_a[0]["text"].as_s - recommended_video["ucid"] = compact_renderer["shortBylineText"]["runs"].as_a[0]["navigationEndpoint"]["browseEndpoint"]["browseId"].as_s - recommended_video["author_thumbnail"] = compact_renderer["channelThumbnail"]["thumbnails"][0]["url"].as_s + recommended_video["id"] = video_renderer["videoId"].as_s + recommended_video["title"] = video_renderer["title"]["simpleText"].as_s + recommended_video["author"] = video_renderer["shortBylineText"]["runs"].as_a[0]["text"].as_s + recommended_video["ucid"] = video_renderer["shortBylineText"]["runs"].as_a[0]["navigationEndpoint"]["browseEndpoint"]["browseId"].as_s + recommended_video["author_thumbnail"] = video_renderer["channelThumbnail"]["thumbnails"][0]["url"].as_s - recommended_video["short_view_count_text"] = compact_renderer["shortViewCountText"]["simpleText"].as_s - recommended_video["view_count"] = compact_renderer["viewCountText"]?.try &.["simpleText"]?.try &.as_s.delete(", views watching").to_i64?.try &.to_s || "0" - recommended_video["length_seconds"] = decode_length_seconds(compact_renderer["lengthText"]?.try &.["simpleText"]?.try &.as_s || "0:00").to_s + recommended_video["short_view_count_text"] = video_renderer["shortViewCountText"]?.try { |field| field["simpleText"]?.try &.as_s || field["runs"].as_a.map { |text| text["text"].as_s }.join("") } || "0" + recommended_video["view_count"] = video_renderer["viewCountText"]?.try { |field| field["simpleText"]?.try &.as_s || field["runs"].as_a.map { |text| text["text"].as_s }.join("") }.try &.delete(", views watching").to_i64?.try &.to_s || "0" + recommended_video["length_seconds"] = decode_length_seconds(video_renderer["lengthText"]?.try &.["simpleText"]?.try &.as_s || "0:00").to_s rvs << recommended_video.to_s end @@ -1072,8 +1093,40 @@ def extract_player_config(body, html) params["session_token"] = md["session_token"] end - if md = body.match(/'RELATED_PLAYER_ARGS': (?{"rvs":"[^"]+"})/) - params["rvs"] = JSON.parse(md["rvs"])["rvs"].as_s + if md = body.match(/'RELATED_PLAYER_ARGS': (?.*?),\n/) + recommended_json = JSON.parse(md["json"]) + if watch_next_response = recommended_json["watch_next_response"]? + rvs = [] of String + watch_next_json = JSON.parse(watch_next_response.as_s) + recommended_videos = watch_next_json["contents"]? + .try &.["twoColumnWatchNextResults"]? + .try &.["secondaryResults"]? + .try &.["secondaryResults"]? + .try &.["results"]? + .try &.as_a + + recommended_videos.try &.each do |compact_renderer| + if compact_renderer["compactRadioRenderer"]? || compact_renderer["compactPlaylistRenderer"]? + # TODO + elsif video_renderer = compact_renderer["compactVideoRenderer"]? + recommended_video = HTTP::Params.new + recommended_video["id"] = video_renderer["videoId"].as_s + recommended_video["title"] = video_renderer["title"]["simpleText"].as_s + recommended_video["author"] = video_renderer["shortBylineText"]["runs"].as_a[0]["text"].as_s + recommended_video["ucid"] = video_renderer["shortBylineText"]["runs"].as_a[0]["navigationEndpoint"]["browseEndpoint"]["browseId"].as_s + recommended_video["author_thumbnail"] = video_renderer["channelThumbnail"]["thumbnails"][0]["url"].as_s + + recommended_video["short_view_count_text"] = video_renderer["shortViewCountText"]?.try { |field| field["simpleText"]?.try &.as_s || field["runs"].as_a.map { |text| text["text"].as_s }.join("") } || "0" + recommended_video["view_count"] = video_renderer["viewCountText"]?.try { |field| field["simpleText"]?.try &.as_s || field["runs"].as_a.map { |text| text["text"].as_s }.join("") }.try &.delete(", views watching").to_i64?.try &.to_s || "0" + recommended_video["length_seconds"] = decode_length_seconds(video_renderer["lengthText"]?.try &.["simpleText"]?.try &.as_s || "0:00").to_s + + rvs << recommended_video.to_s + end + end + params["rvs"] = rvs.join(",") + elsif recommended_json["rvs"]? + params["rvs"] = recommended_json["rvs"].as_s + end end html_info = body.match(/ytplayer\.config = (?.*?);ytplayer\.load/).try &.["info"]