Merge branch 'master' into api-only

2025-07-28 16:28:29 +00:00 · 2018-11-02 08:19:10 -05:00 · 2018-11-02 08:19:10 -05:00 · 57bc259c51
commit 57bc259c51
parent 8fd03e7621 c92f6e44e7
3 changed files with 1833 additions and 217 deletions
--- a/src/invidious.cr
+++ b/src/invidious.cr
--- a/src/invidious/comments.cr
+++ b/src/invidious/comments.cr
@ -56,7 +56,221 @@ class RedditListing
  })
 end

-def get_reddit_comments(id, client, headers)
+def fetch_youtube_comments(id, continuation, proxies, format)
+  client = make_client(YT_URL)
+  html = client.get("/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&gl=US&hl=en&disable_polymer=1")
+  headers = HTTP::Headers.new
+  headers["cookie"] = html.cookies.add_request_headers(headers)["cookie"]
+  body = html.body
+
+  session_token = body.match(/'XSRF_TOKEN': "(?<session_token>[A-Za-z0-9\_\-\=]+)"/).not_nil!["session_token"]
+  itct = body.match(/itct=(?<itct>[^"]+)"/).not_nil!["itct"]
+  ctoken = body.match(/'COMMENTS_TOKEN': "(?<ctoken>[^"]+)"/)
+
+  if body.match(/<meta itemprop="regionsAllowed" content="">/)
+    bypass_channel = Channel({String, HTTPClient, HTTP::Headers} | Nil).new
+
+    proxies.each do |region, list|
+      spawn do
+        proxy_html = %(<meta itemprop="regionsAllowed" content="">)
+
+        list.each do |proxy|
+          begin
+            proxy_client = HTTPClient.new(YT_URL)
+            proxy_client.read_timeout = 10.seconds
+            proxy_client.connect_timeout = 10.seconds
+
+            proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port])
+            proxy_client.set_proxy(proxy)
+
+            response = proxy_client.get("/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&gl=US&hl=en&disable_polymer=1")
+            proxy_headers = HTTP::Headers.new
+            proxy_headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"]
+            proxy_html = response.body
+
+            if !proxy_html.match(/<meta itemprop="regionsAllowed" content="">/)
+              bypass_channel.send({proxy_html, proxy_client, proxy_headers})
+              break
+            end
+          rescue ex
+          end
+        end
+
+        # If none of the proxies we tried returned a valid response
+        if proxy_html.match(/<meta itemprop="regionsAllowed" content="">/)
+          bypass_channel.send(nil)
+        end
+      end
+    end
+
+    proxies.size.times do
+      response = bypass_channel.receive
+      if response
+        session_token = response[0].match(/'XSRF_TOKEN': "(?<session_token>[A-Za-z0-9\_\-\=]+)"/).not_nil!["session_token"]
+        itct = response[0].match(/itct=(?<itct>[^"]+)"/).not_nil!["itct"]
+        ctoken = response[0].match(/'COMMENTS_TOKEN': "(?<ctoken>[^"]+)"/)
+
+        client = response[1]
+        headers = response[2]
+        break
+      end
+    end
+  end
+
+  if !ctoken
+    if format == "json"
+      return {"comments" => [] of String}.to_json
+    else
+      return {"contentHtml" => "", "commentCount" => 0}.to_json
+    end
+  end
+  ctoken = ctoken["ctoken"]
+
+  if !continuation.empty?
+    ctoken = continuation
+  else
+    continuation = ctoken
+  end
+
+  post_req = {
+    "session_token" => session_token,
+  }
+  post_req = HTTP::Params.encode(post_req)
+
+  headers["content-type"] = "application/x-www-form-urlencoded"
+
+  headers["x-client-data"] = "CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ=="
+  headers["x-spf-previous"] = "https://www.youtube.com/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&gl=US&hl=en&disable_polymer=1"
+  headers["x-spf-referer"] = "https://www.youtube.com/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&gl=US&hl=en&disable_polymer=1"
+
+  headers["x-youtube-client-name"] = "1"
+  headers["x-youtube-client-version"] = "2.20180719"
+  response = client.post("/comment_service_ajax?action_get_comments=1&pbj=1&ctoken=#{ctoken}&continuation=#{continuation}&itct=#{itct}&hl=en&gl=US", headers, post_req)
+  response = JSON.parse(response.body)
+
+  if !response["response"]["continuationContents"]?
+    raise "Could not fetch comments"
+  end
+
+  response = response["response"]["continuationContents"]
+  if response["commentRepliesContinuation"]?
+    body = response["commentRepliesContinuation"]
+  else
+    body = response["itemSectionContinuation"]
+  end
+
+  contents = body["contents"]?
+  if !contents
+    if format == "json"
+      return {"comments" => [] of String}.to_json
+    else
+      return {"contentHtml" => "", "commentCount" => 0}.to_json
+    end
+  end
+
+  comments = JSON.build do |json|
+    json.object do
+      if body["header"]?
+        comment_count = body["header"]["commentsHeaderRenderer"]["countText"]["simpleText"].as_s.delete("Comments,").to_i
+        json.field "commentCount", comment_count
+      end
+
+      json.field "comments" do
+        json.array do
+          contents.as_a.each do |node|
+            json.object do
+              if !response["commentRepliesContinuation"]?
+                node = node["commentThreadRenderer"]
+              end
+
+              if node["replies"]?
+                node_replies = node["replies"]["commentRepliesRenderer"]
+              end
+
+              if !response["commentRepliesContinuation"]?
+                node_comment = node["comment"]["commentRenderer"]
+              else
+                node_comment = node["commentRenderer"]
+              end
+
+              content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff')
+              if content_html
+                content_html = HTML.escape(content_html)
+              end
+
+              content_html ||= content_to_comment_html(node_comment["contentText"]["runs"].as_a)
+              content_html, content = html_to_content(content_html)
+
+              author = node_comment["authorText"]?.try &.["simpleText"]
+              author ||= ""
+
+              json.field "author", author
+              json.field "authorThumbnails" do
+                json.array do
+                  node_comment["authorThumbnail"]["thumbnails"].as_a.each do |thumbnail|
+                    json.object do
+                      json.field "url", thumbnail["url"]
+                      json.field "width", thumbnail["width"]
+                      json.field "height", thumbnail["height"]
+                    end
+                  end
+                end
+              end
+
+              if node_comment["authorEndpoint"]?
+                json.field "authorId", node_comment["authorEndpoint"]["browseEndpoint"]["browseId"]
+                json.field "authorUrl", node_comment["authorEndpoint"]["browseEndpoint"]["canonicalBaseUrl"]
+              else
+                json.field "authorId", ""
+                json.field "authorUrl", ""
+              end
+
+              published = decode_date(node_comment["publishedTimeText"]["runs"][0]["text"].as_s.rchop(" (edited)"))
+
+              json.field "content", content
+              json.field "contentHtml", content_html
+              json.field "published", published.epoch
+              json.field "publishedText", "#{recode_date(published)} ago"
+              json.field "likeCount", node_comment["likeCount"]
+              json.field "commentId", node_comment["commentId"]
+
+              if node_replies && !response["commentRepliesContinuation"]?
+                reply_count = node_replies["moreText"]["simpleText"].as_s.delete("View all reply replies,")
+                if reply_count.empty?
+                  reply_count = 1
+                else
+                  reply_count = reply_count.try &.to_i?
+                  reply_count ||= 1
+                end
+
+                continuation = node_replies["continuations"].as_a[0]["nextContinuationData"]["continuation"].as_s
+
+                json.field "replies" do
+                  json.object do
+                    json.field "replyCount", reply_count
+                    json.field "continuation", continuation
+                  end
+                end
+              end
+            end
+          end
+        end
+      end
+
+      if body["continuations"]?
+        continuation = body["continuations"][0]["nextContinuationData"]["continuation"]
+        json.field "continuation", continuation
+      end
+    end
+  end
+
+  return comments
+end
+
+def fetch_reddit_comments(id)
+  client = make_client(REDDIT_URL)
+  headers = HTTP::Headers{"User-Agent" => "web:invidio.us:v0.11.0 (by /u/omarroth)"}
+
  query = "(url:3D#{id}%20OR%20url:#{id})%20(site:youtube.com%20OR%20site:youtu.be)"
  search_results = client.get("/search.json?q=#{query}", headers)

--- a/src/invidious/videos.cr
+++ b/src/invidious/videos.cr
@ -262,6 +262,10 @@ class Video
    end
  end

+  def keywords
+    return self.player_response["videoDetails"]["keywords"].as_a
+  end
+
  def fmt_stream(decrypt_function)
    streams = [] of HTTP::Params
    self.info["url_encoded_fmt_stream_map"].split(",") do |string|
@ -638,16 +642,19 @@ def fetch_video(id, proxies)
  end

  title = info["title"]
-  views = info["view_count"].to_i64
  author = info["author"]
  ucid = info["ucid"]

+  views = html.xpath_node(%q(//meta[@itemprop="interactionCount"]))
+  views = views.try &.["content"].to_i64?
+  views ||= 0_i64
+
  likes = html.xpath_node(%q(//button[@title="I like this"]/span))
-  likes = likes.try &.content.delete(",").try &.to_i
+  likes = likes.try &.content.delete(",").try &.to_i?
  likes ||= 0

  dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
-  dislikes = dislikes.try &.content.delete(",").try &.to_i
+  dislikes = dislikes.try &.content.delete(",").try &.to_i?
  dislikes ||= 0

  description = html.xpath_node(%q(//p[@id="eow-description"]))