Use new API to fetch videos from channels

This mirrors the process used by subscriptions.gir.st. The old API is
tried first, and if it fails then the new one is used.
This commit is contained in:
afuous 2020-08-29 07:52:30 -07:00
parent 13f58d602f
commit b43866eeda
2 changed files with 197 additions and 151 deletions

View File

@ -396,7 +396,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
return items, continuation return items, continuation
end end
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest") def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object = { object = {
"80226972:embedded" => { "80226972:embedded" => {
"2:string" => ucid, "2:string" => ucid,
@ -411,18 +411,33 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
}, },
} }
if auto_generated if !v2
seed = Time.unix(1525757349) if auto_generated
until seed >= Time.utc seed = Time.unix(1525757349)
seed += 1.month until seed >= Time.utc
end seed += 1.month
timestamp = seed - (page - 1).months end
timestamp = seed - (page - 1).months
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64 object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}" object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
end
else else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64 object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:embedded" => {
"1:varint" => 6307666885028338688_i64,
"2:embedded" => {
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:varint" => 30_i64 * (page - 1),
}))),
},
},
})))
end end
case sort_by case sort_by
@ -904,12 +919,25 @@ end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo videos = [] of SearchVideo
2.times do |i| needs_v2 = false
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
i = 0
while i < 2
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by, v2: needs_v2)
response = YT_POOL.client &.get(url) response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]? initial_data = JSON.parse(response.body).as_a.find &.["response"]?
break if !initial_data break if !initial_data
videos.concat extract_videos(initial_data.as_h, author, ucid) v1_error = !needs_v2 && initial_data
.try &.["response"]?.try &.["alerts"]?
.try &.as_a.any? { |alert|
alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
}
if v1_error
needs_v2 = true
else
videos.concat extract_videos(initial_data.as_h, author, ucid)
i += 1
end
end end
return videos.size, videos return videos.size, videos

View File

@ -164,148 +164,166 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
end end
def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
video_id = i["videoId"].as_s
title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
author = author_info.try &.["text"].as_s || author_fallback || ""
author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
live_now = false
paid = false
premium = false
premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
i["badges"]?.try &.as_a.each do |badge|
b = badge["metadataBadgeRenderer"]
case b["label"].as_s
when "LIVE NOW"
live_now = true
when "New", "4K", "CC"
# TODO
when "Premium"
paid = true
# TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
premium = true
else nil # Ignore
end
end
SearchVideo.new({
title: title,
id: video_id,
author: author,
ucid: author_id,
published: published,
views: view_count,
description_html: description_html,
length_seconds: length_seconds,
live_now: live_now,
paid: paid,
premium: premium,
premiere_timestamp: premiere_timestamp,
})
elsif i = item["channelRenderer"]?
author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
auto_generated = false
auto_generated = true if !i["videoCountText"]?
video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
SearchChannel.new({
author: author,
ucid: author_id,
author_thumbnail: author_thumbnail,
subscriber_count: subscriber_count,
video_count: video_count,
description_html: description_html,
auto_generated: auto_generated,
})
elsif i = item["gridPlaylistRenderer"]?
title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
plid = i["playlistId"]?.try &.as_s || ""
video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
SearchPlaylist.new({
title: title,
id: plid,
author: author_fallback || "",
ucid: author_id_fallback || "",
video_count: video_count,
videos: [] of SearchPlaylistVideo,
thumbnail: playlist_thumbnail,
})
elsif i = item["playlistRenderer"]?
title = i["title"]["simpleText"]?.try &.as_s || ""
plid = i["playlistId"]?.try &.as_s || ""
video_count = i["videoCount"]?.try &.as_s.to_i || 0
playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
author = author_info.try &.["text"].as_s || author_fallback || ""
author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
videos = i["videos"]?.try &.as_a.map do |v|
v = v["childVideoRenderer"]
v_title = v["title"]["simpleText"]?.try &.as_s || ""
v_id = v["videoId"]?.try &.as_s || ""
v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
SearchPlaylistVideo.new({
title: v_title,
id: v_id,
length_seconds: v_length_seconds,
})
end || [] of SearchPlaylistVideo
# TODO: i["publishedTimeText"]?
SearchPlaylist.new({
title: title,
id: plid,
author: author,
ucid: author_id,
video_count: video_count,
videos: videos,
thumbnail: playlist_thumbnail,
})
elsif i = item["radioRenderer"]? # Mix
# TODO
elsif i = item["showRenderer"]? # Show
# TODO
elsif i = item["shelfRenderer"]?
elsif i = item["horizontalCardListRenderer"]?
elsif i = item["searchPyvRenderer"]? # Ad
end
end
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
items = [] of SearchItem items = [] of SearchItem
initial_data.try { |t| t["contents"]? || t["response"]? } channel_v2_response = initial_data
.try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] || .try &.["response"]?
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] || .try &.["continuationContents"]?
t["continuationContents"]? } .try &.["gridContinuation"]?
.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? } .try &.["items"]?
.try &.["contents"].as_a
.each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
.each { |item|
if i = item["videoRenderer"]?
video_id = i["videoId"].as_s
title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
author_info = i["ownerText"]?.try &.["runs"].as_a[0]? if channel_v2_response
author = author_info.try &.["text"].as_s || author_fallback || "" channel_v2_response.try &.as_a.each { |item|
author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" extract_item(item, author_fallback, author_id_fallback)
.try { |t| items << t }
published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local }
view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 else
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" initial_data.try { |t| t["contents"]? || t["response"]? }
length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0 .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
live_now = false t["continuationContents"]? }
paid = false .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
premium = false .try &.["contents"].as_a
.each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) } .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
i["badges"]?.try &.as_a.each do |badge| .each { |item|
b = badge["metadataBadgeRenderer"] extract_item(item, author_fallback, author_id_fallback)
case b["label"].as_s .try { |t| items << t }
when "LIVE NOW" } }
live_now = true end
when "New", "4K", "CC"
# TODO
when "Premium"
paid = true
# TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
premium = true
else nil # Ignore
end
end
items << SearchVideo.new({
title: title,
id: video_id,
author: author,
ucid: author_id,
published: published,
views: view_count,
description_html: description_html,
length_seconds: length_seconds,
live_now: live_now,
paid: paid,
premium: premium,
premiere_timestamp: premiere_timestamp,
})
elsif i = item["channelRenderer"]?
author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
auto_generated = false
auto_generated = true if !i["videoCountText"]?
video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
items << SearchChannel.new({
author: author,
ucid: author_id,
author_thumbnail: author_thumbnail,
subscriber_count: subscriber_count,
video_count: video_count,
description_html: description_html,
auto_generated: auto_generated,
})
elsif i = item["gridPlaylistRenderer"]?
title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
plid = i["playlistId"]?.try &.as_s || ""
video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
items << SearchPlaylist.new({
title: title,
id: plid,
author: author_fallback || "",
ucid: author_id_fallback || "",
video_count: video_count,
videos: [] of SearchPlaylistVideo,
thumbnail: playlist_thumbnail,
})
elsif i = item["playlistRenderer"]?
title = i["title"]["simpleText"]?.try &.as_s || ""
plid = i["playlistId"]?.try &.as_s || ""
video_count = i["videoCount"]?.try &.as_s.to_i || 0
playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
author = author_info.try &.["text"].as_s || author_fallback || ""
author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
videos = i["videos"]?.try &.as_a.map do |v|
v = v["childVideoRenderer"]
v_title = v["title"]["simpleText"]?.try &.as_s || ""
v_id = v["videoId"]?.try &.as_s || ""
v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
SearchPlaylistVideo.new({
title: v_title,
id: v_id,
length_seconds: v_length_seconds,
})
end || [] of SearchPlaylistVideo
# TODO: i["publishedTimeText"]?
items << SearchPlaylist.new({
title: title,
id: plid,
author: author,
ucid: author_id,
video_count: video_count,
videos: videos,
thumbnail: playlist_thumbnail,
})
elsif i = item["radioRenderer"]? # Mix
# TODO
elsif i = item["showRenderer"]? # Show
# TODO
elsif i = item["shelfRenderer"]?
elsif i = item["horizontalCardListRenderer"]?
elsif i = item["searchPyvRenderer"]? # Ad
end
} }
items items
end end