Merge branch 'master' into api-only

This commit is contained in:
Omar Roth 2018-11-02 08:19:10 -05:00
commit 57bc259c51
3 changed files with 1833 additions and 217 deletions

File diff suppressed because it is too large Load Diff

View File

@ -56,7 +56,221 @@ class RedditListing
})
end
def get_reddit_comments(id, client, headers)
def fetch_youtube_comments(id, continuation, proxies, format)
client = make_client(YT_URL)
html = client.get("/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&gl=US&hl=en&disable_polymer=1")
headers = HTTP::Headers.new
headers["cookie"] = html.cookies.add_request_headers(headers)["cookie"]
body = html.body
session_token = body.match(/'XSRF_TOKEN': "(?<session_token>[A-Za-z0-9\_\-\=]+)"/).not_nil!["session_token"]
itct = body.match(/itct=(?<itct>[^"]+)"/).not_nil!["itct"]
ctoken = body.match(/'COMMENTS_TOKEN': "(?<ctoken>[^"]+)"/)
if body.match(/<meta itemprop="regionsAllowed" content="">/)
bypass_channel = Channel({String, HTTPClient, HTTP::Headers} | Nil).new
proxies.each do |region, list|
spawn do
proxy_html = %(<meta itemprop="regionsAllowed" content="">)
list.each do |proxy|
begin
proxy_client = HTTPClient.new(YT_URL)
proxy_client.read_timeout = 10.seconds
proxy_client.connect_timeout = 10.seconds
proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port])
proxy_client.set_proxy(proxy)
response = proxy_client.get("/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&gl=US&hl=en&disable_polymer=1")
proxy_headers = HTTP::Headers.new
proxy_headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"]
proxy_html = response.body
if !proxy_html.match(/<meta itemprop="regionsAllowed" content="">/)
bypass_channel.send({proxy_html, proxy_client, proxy_headers})
break
end
rescue ex
end
end
# If none of the proxies we tried returned a valid response
if proxy_html.match(/<meta itemprop="regionsAllowed" content="">/)
bypass_channel.send(nil)
end
end
end
proxies.size.times do
response = bypass_channel.receive
if response
session_token = response[0].match(/'XSRF_TOKEN': "(?<session_token>[A-Za-z0-9\_\-\=]+)"/).not_nil!["session_token"]
itct = response[0].match(/itct=(?<itct>[^"]+)"/).not_nil!["itct"]
ctoken = response[0].match(/'COMMENTS_TOKEN': "(?<ctoken>[^"]+)"/)
client = response[1]
headers = response[2]
break
end
end
end
if !ctoken
if format == "json"
return {"comments" => [] of String}.to_json
else
return {"contentHtml" => "", "commentCount" => 0}.to_json
end
end
ctoken = ctoken["ctoken"]
if !continuation.empty?
ctoken = continuation
else
continuation = ctoken
end
post_req = {
"session_token" => session_token,
}
post_req = HTTP::Params.encode(post_req)
headers["content-type"] = "application/x-www-form-urlencoded"
headers["x-client-data"] = "CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ=="
headers["x-spf-previous"] = "https://www.youtube.com/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&gl=US&hl=en&disable_polymer=1"
headers["x-spf-referer"] = "https://www.youtube.com/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}&gl=US&hl=en&disable_polymer=1"
headers["x-youtube-client-name"] = "1"
headers["x-youtube-client-version"] = "2.20180719"
response = client.post("/comment_service_ajax?action_get_comments=1&pbj=1&ctoken=#{ctoken}&continuation=#{continuation}&itct=#{itct}&hl=en&gl=US", headers, post_req)
response = JSON.parse(response.body)
if !response["response"]["continuationContents"]?
raise "Could not fetch comments"
end
response = response["response"]["continuationContents"]
if response["commentRepliesContinuation"]?
body = response["commentRepliesContinuation"]
else
body = response["itemSectionContinuation"]
end
contents = body["contents"]?
if !contents
if format == "json"
return {"comments" => [] of String}.to_json
else
return {"contentHtml" => "", "commentCount" => 0}.to_json
end
end
comments = JSON.build do |json|
json.object do
if body["header"]?
comment_count = body["header"]["commentsHeaderRenderer"]["countText"]["simpleText"].as_s.delete("Comments,").to_i
json.field "commentCount", comment_count
end
json.field "comments" do
json.array do
contents.as_a.each do |node|
json.object do
if !response["commentRepliesContinuation"]?
node = node["commentThreadRenderer"]
end
if node["replies"]?
node_replies = node["replies"]["commentRepliesRenderer"]
end
if !response["commentRepliesContinuation"]?
node_comment = node["comment"]["commentRenderer"]
else
node_comment = node["commentRenderer"]
end
content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff')
if content_html
content_html = HTML.escape(content_html)
end
content_html ||= content_to_comment_html(node_comment["contentText"]["runs"].as_a)
content_html, content = html_to_content(content_html)
author = node_comment["authorText"]?.try &.["simpleText"]
author ||= ""
json.field "author", author
json.field "authorThumbnails" do
json.array do
node_comment["authorThumbnail"]["thumbnails"].as_a.each do |thumbnail|
json.object do
json.field "url", thumbnail["url"]
json.field "width", thumbnail["width"]
json.field "height", thumbnail["height"]
end
end
end
end
if node_comment["authorEndpoint"]?
json.field "authorId", node_comment["authorEndpoint"]["browseEndpoint"]["browseId"]
json.field "authorUrl", node_comment["authorEndpoint"]["browseEndpoint"]["canonicalBaseUrl"]
else
json.field "authorId", ""
json.field "authorUrl", ""
end
published = decode_date(node_comment["publishedTimeText"]["runs"][0]["text"].as_s.rchop(" (edited)"))
json.field "content", content
json.field "contentHtml", content_html
json.field "published", published.epoch
json.field "publishedText", "#{recode_date(published)} ago"
json.field "likeCount", node_comment["likeCount"]
json.field "commentId", node_comment["commentId"]
if node_replies && !response["commentRepliesContinuation"]?
reply_count = node_replies["moreText"]["simpleText"].as_s.delete("View all reply replies,")
if reply_count.empty?
reply_count = 1
else
reply_count = reply_count.try &.to_i?
reply_count ||= 1
end
continuation = node_replies["continuations"].as_a[0]["nextContinuationData"]["continuation"].as_s
json.field "replies" do
json.object do
json.field "replyCount", reply_count
json.field "continuation", continuation
end
end
end
end
end
end
end
if body["continuations"]?
continuation = body["continuations"][0]["nextContinuationData"]["continuation"]
json.field "continuation", continuation
end
end
end
return comments
end
def fetch_reddit_comments(id)
client = make_client(REDDIT_URL)
headers = HTTP::Headers{"User-Agent" => "web:invidio.us:v0.11.0 (by /u/omarroth)"}
query = "(url:3D#{id}%20OR%20url:#{id})%20(site:youtube.com%20OR%20site:youtu.be)"
search_results = client.get("/search.json?q=#{query}", headers)

View File

@ -262,6 +262,10 @@ class Video
end
end
def keywords
return self.player_response["videoDetails"]["keywords"].as_a
end
def fmt_stream(decrypt_function)
streams = [] of HTTP::Params
self.info["url_encoded_fmt_stream_map"].split(",") do |string|
@ -638,16 +642,19 @@ def fetch_video(id, proxies)
end
title = info["title"]
views = info["view_count"].to_i64
author = info["author"]
ucid = info["ucid"]
views = html.xpath_node(%q(//meta[@itemprop="interactionCount"]))
views = views.try &.["content"].to_i64?
views ||= 0_i64
likes = html.xpath_node(%q(//button[@title="I like this"]/span))
likes = likes.try &.content.delete(",").try &.to_i
likes = likes.try &.content.delete(",").try &.to_i?
likes ||= 0
dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
dislikes = dislikes.try &.content.delete(",").try &.to_i
dislikes = dislikes.try &.content.delete(",").try &.to_i?
dislikes ||= 0
description = html.xpath_node(%q(//p[@id="eow-description"]))