Add support for the new channel layout - part 2 (#3419)

This commit is contained in:
Samantaz Fox
2023-01-10 21:16:12 +01:00
28 changed files with 765 additions and 684 deletions

View File

@@ -16,12 +16,6 @@ record AboutChannel,
tabs : Array(String),
verified : Bool
record AboutRelatedChannel,
ucid : String,
author : String,
author_url : String,
author_thumbnail : String
def get_about_info(ucid, locale) : AboutChannel
begin
# "EgVhYm91dA==" is the base64-encoded protobuf object {"2:string":"about"}
@@ -100,34 +94,46 @@ def get_about_info(ucid, locale) : AboutChannel
total_views = 0_i64
joined = Time.unix(0)
tabs = [] of String
tab_names = [] of String
tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?.try &.as_a?
if !tabs_json.nil?
# Retrieve information from the tabs array. The index we are looking for varies between channels.
tabs_json.each do |node|
# Try to find the about section which is located in only one of the tabs.
channel_about_meta = node["tabRenderer"]?.try &.["content"]?.try &.["sectionListRenderer"]?
.try &.["contents"]?.try &.[0]?.try &.["itemSectionRenderer"]?.try &.["contents"]?
.try &.[0]?.try &.["channelAboutFullMetadataRenderer"]?
if tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?
# Get the name of the tabs available on this channel
tab_names = tabs_json.as_a.compact_map do |entry|
name = entry.dig?("tabRenderer", "title").try &.as_s.downcase
if !channel_about_meta.nil?
total_views = channel_about_meta["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
# The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
joined = channel_about_meta["joinedDateText"]?.try &.["runs"]?.try &.as_a.reduce("") { |acc, nd| acc + nd["text"].as_s }
.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
# Normal Auto-generated channels
# https://support.google.com/youtube/answer/2579942
# For auto-generated channels, channel_about_meta only has ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
if (channel_about_meta["primaryLinks"]?.try &.size || 0) == 1 && (channel_about_meta["primaryLinks"][0]?) &&
(channel_about_meta["primaryLinks"][0]["title"]?.try &.["simpleText"]?.try &.as_s? || "") == "Auto-generated by YouTube"
auto_generated = true
end
end
# This is a small fix to not add extra code on the HTML side
# I.e, the URL for the "live" tab is .../streams, so use "streams"
# everywhere for the sake of simplicity
(name == "live") ? "streams" : name
end
# Get the currently active tab ("About")
about_tab = extract_selected_tab(tabs_json)
# Try to find the about metadata section
channel_about_meta = about_tab.dig?(
"content",
"sectionListRenderer", "contents", 0,
"itemSectionRenderer", "contents", 0,
"channelAboutFullMetadataRenderer"
)
if !channel_about_meta.nil?
total_views = channel_about_meta.dig?("viewCountText", "simpleText").try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
# The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
joined = extract_text(channel_about_meta["joinedDateText"]?)
.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
# Normal Auto-generated channels
# https://support.google.com/youtube/answer/2579942
# For auto-generated channels, channel_about_meta only has
# ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
auto_generated = (
(channel_about_meta["primaryLinks"]?.try &.size) == 1 && \
extract_text(channel_about_meta.dig?("primaryLinks", 0, "title")) == "Auto-generated by YouTube"
)
end
tabs = tabs_json.reject { |node| node["tabRenderer"]?.nil? }.map(&.["tabRenderer"]["title"].as_s.downcase)
end
sub_count = initdata
@@ -148,46 +154,20 @@ def get_about_info(ucid, locale) : AboutChannel
joined: joined,
is_family_friendly: is_family_friendly,
allowed_regions: allowed_regions,
tabs: tabs,
tabs: tab_names,
verified: author_verified || false,
)
end
def fetch_related_channels(about_channel : AboutChannel) : Array(AboutRelatedChannel)
# params is {"2:string":"channels"} encoded
channels = YoutubeAPI.browse(browse_id: about_channel.ucid, params: "EghjaGFubmVscw%3D%3D")
tabs = channels.dig?("contents", "twoColumnBrowseResultsRenderer", "tabs").try(&.as_a?) || [] of JSON::Any
tab = tabs.find(&.dig?("tabRenderer", "title").try(&.as_s?).try(&.== "Channels"))
return [] of AboutRelatedChannel if tab.nil?
items = tab.dig?(
"tabRenderer", "content",
"sectionListRenderer", "contents", 0,
"itemSectionRenderer", "contents", 0,
"gridRenderer", "items"
).try &.as_a?
related = [] of AboutRelatedChannel
return related if (items.nil? || items.empty?)
items.each do |item|
renderer = item["gridChannelRenderer"]?
next if !renderer
related_id = renderer.dig("channelId").as_s
related_title = renderer.dig("title", "simpleText").as_s
related_author_url = renderer.dig("navigationEndpoint", "browseEndpoint", "canonicalBaseUrl").as_s
related_author_thumbnail = HelperExtractors.get_thumbnails(renderer)
related << AboutRelatedChannel.new(
ucid: related_id,
author: related_title,
author_url: related_author_url,
author_thumbnail: related_author_thumbnail,
)
def fetch_related_channels(about_channel : AboutChannel, continuation : String? = nil) : {Array(SearchChannel), String?}
if continuation.nil?
# params is {"2:string":"channels"} encoded
initial_data = YoutubeAPI.browse(browse_id: about_channel.ucid, params: "EghjaGFubmVscw%3D%3D")
else
initial_data = YoutubeAPI.browse(continuation)
end
return related
items, continuation = extract_items(initial_data)
return items.select(SearchChannel), continuation
end

View File

@@ -180,11 +180,16 @@ def fetch_channel(ucid, pull_all_videos : Bool)
LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")
page = 1
channel = InvidiousChannel.new({
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
videos, continuation = IV::Channel::Tabs.get_videos(channel)
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
rss.xpath_nodes("//feed/entry").each do |entry|
@@ -197,7 +202,9 @@ def fetch_channel(ucid, pull_all_videos : Bool)
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
views ||= 0_i64
channel_video = videos.select { |video| video.id == video_id }[0]?
channel_video = videos
.select(SearchVideo)
.select(&.id.== video_id)[0]?
length_seconds = channel_video.try &.length_seconds
length_seconds ||= 0
@@ -239,30 +246,25 @@ def fetch_channel(ucid, pull_all_videos : Bool)
end
if pull_all_videos
page += 1
ids = [] of String
loop do
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
# Keep fetching videos using the continuation token retrieved earlier
videos, continuation = IV::Channel::Tabs.get_videos(channel, continuation: continuation)
count = videos.size
videos = videos.map { |video| ChannelVideo.new({
id: video.id,
title: video.title,
published: video.published,
updated: Time.utc,
ucid: video.ucid,
author: video.author,
length_seconds: video.length_seconds,
live_now: video.live_now,
premiere_timestamp: video.premiere_timestamp,
views: video.views,
}) }
videos.each do |video|
ids << video.id
count = 0
videos.select(SearchVideo).each do |video|
count += 1
video = ChannelVideo.new({
id: video.id,
title: video.title,
published: video.published,
updated: Time.utc,
ucid: video.ucid,
author: video.author,
length_seconds: video.length_seconds,
live_now: video.live_now,
premiere_timestamp: video.premiere_timestamp,
views: video.views,
})
# We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
# so since they don't provide a published date here we can safely ignore them.
@@ -279,17 +281,10 @@ def fetch_channel(ucid, pull_all_videos : Bool)
end
break if count < 25
page += 1
sleep 500.milliseconds
end
end
channel = InvidiousChannel.new({
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
channel.updated = Time.utc
return channel
end

View File

@@ -1,93 +1,28 @@
def fetch_channel_playlists(ucid, author, continuation, sort_by)
if continuation
response_json = YoutubeAPI.browse(continuation)
continuation_items = response_json["onResponseReceivedActions"]?
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]
return [] of SearchItem, nil if !continuation_items
items = [] of SearchItem
continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
extract_item(item, author, ucid).try { |t| items << t }
}
continuation = continuation_items.as_a.last["continuationItemRenderer"]?
.try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
initial_data = YoutubeAPI.browse(continuation)
else
url = "/channel/#{ucid}/playlists?flow=list&view=1"
params =
case sort_by
when "last", "last_added"
# Equivalent to "&sort=lad"
# {"2:string": "playlists", "3:varint": 4, "4:varint": 1, "6:varint": 1}
"EglwbGF5bGlzdHMYBCABMAE%3D"
when "oldest", "oldest_created"
# formerly "&sort=da"
# Not available anymore :c or maybe ??
# {"2:string": "playlists", "3:varint": 2, "4:varint": 1, "6:varint": 1}
"EglwbGF5bGlzdHMYAiABMAE%3D"
# {"2:string": "playlists", "3:varint": 1, "4:varint": 1, "6:varint": 1}
# "EglwbGF5bGlzdHMYASABMAE%3D"
when "newest", "newest_created"
# Formerly "&sort=dd"
# {"2:string": "playlists", "3:varint": 3, "4:varint": 1, "6:varint": 1}
"EglwbGF5bGlzdHMYAyABMAE%3D"
end
case sort_by
when "last", "last_added"
#
when "oldest", "oldest_created"
url += "&sort=da"
when "newest", "newest_created"
url += "&sort=dd"
else nil # Ignore
end
response = YT_POOL.client &.get(url)
initial_data = extract_initial_data(response.body)
return [] of SearchItem, nil if !initial_data
items = extract_items(initial_data, author, ucid)
continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]?
initial_data = YoutubeAPI.browse(ucid, params: params || "")
end
return items, continuation
end
# ## NOTE: DEPRECATED
# Reason -> Unstable
# The Protobuf object must be provided with an id of the last playlist from the current "page"
# in order to fetch the next one accurately
# (if the id isn't included, entries shift around erratically between pages,
# leading to repetitions and skip overs)
#
# Since it's impossible to produce the appropriate Protobuf without an id being provided by the user,
# it's better to stick to continuation tokens provided by the first request and onward
def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "playlists",
"6:varint" => 2_i64,
"7:varint" => 1_i64,
"12:varint" => 1_i64,
"13:string" => "",
"23:varint" => 0_i64,
},
},
}
if cursor
cursor = Base64.urlsafe_encode(cursor, false) if !auto_generated
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = cursor
end
if auto_generated
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x32_i64
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 1_i64
case sort
when "oldest", "oldest_created"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 2_i64
when "newest", "newest_created"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 3_i64
when "last", "last_added"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 4_i64
else nil # Ignore
end
end
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
object["80226972:embedded"].delete("3:base64")
continuation = object.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
return extract_items(initial_data, author, ucid)
end

View File

@@ -16,6 +16,14 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
sort_by_numerical =
case sort_by
when "newest" then 1_i64
when "popular" then 2_i64
when "oldest" then 3_i64 # Broken as of 10/2022 :c
else 1_i64 # Fallback to "newest"
end
object_inner_1 = {
"110:embedded" => {
"3:embedded" => {
@@ -24,7 +32,7 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
"1:string" => object_inner_2_encoded,
"2:string" => "00000000-0000-0000-0000-000000000000",
},
"3:varint" => 1_i64,
"3:varint" => sort_by_numerical,
},
},
},
@@ -52,34 +60,138 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
return continuation
end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
continuation = produce_channel_videos_continuation(ucid, page,
auto_generated: auto_generated, sort_by: sort_by, v2: true)
return YoutubeAPI.browse(continuation)
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo
# 2.times do |i|
# initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by)
videos = extract_videos(initial_data, author, ucid)
# end
return videos.size, videos
end
def get_latest_videos(ucid)
initial_data = get_channel_videos_response(ucid)
author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
return extract_videos(initial_data, author, ucid)
end
# Used in bypass_captcha_job.cr
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end
module Invidious::Channel::Tabs
extend self
# -------------------
# Regular videos
# -------------------
def make_initial_video_ctoken(ucid, sort_by) : String
return produce_channel_videos_continuation(ucid, sort_by: sort_by)
end
# Wrapper for AboutChannel, as we still need to call get_videos with
# an author name and ucid directly (e.g in RSS feeds).
# TODO: figure out how to get rid of that
def get_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
return get_videos(
channel.author, channel.ucid,
continuation: continuation, sort_by: sort_by
)
end
# Wrapper for InvidiousChannel, as we still need to call get_videos with
# an author name and ucid directly (e.g in RSS feeds).
# TODO: figure out how to get rid of that
def get_videos(channel : InvidiousChannel, *, continuation : String? = nil, sort_by = "newest")
return get_videos(
channel.author, channel.id,
continuation: continuation, sort_by: sort_by
)
end
def get_videos(author : String, ucid : String, *, continuation : String? = nil, sort_by = "newest")
continuation ||= make_initial_video_ctoken(ucid, sort_by)
initial_data = YoutubeAPI.browse(continuation: continuation)
return extract_items(initial_data, author, ucid)
end
def get_60_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
if continuation.nil?
# Fetch the first "page" of video
items, next_continuation = get_videos(channel, sort_by: sort_by)
else
# Fetch a "page" of videos using the given continuation token
items, next_continuation = get_videos(channel, continuation: continuation)
end
# If there is more to load, then load a second "page"
# and replace the previous continuation token
if !next_continuation.nil?
items_2, next_continuation = get_videos(channel, continuation: next_continuation)
items.concat items_2
end
return items, next_continuation
end
# -------------------
# Shorts
# -------------------
private def fetch_shorts_data(ucid : String, continuation : String? = nil)
if continuation.nil?
# EgZzaG9ydHPyBgUKA5oBAA%3D%3D is the protobuf object to load "shorts"
# TODO: try to extract the continuation tokens that allows other sorting options
return YoutubeAPI.browse(ucid, params: "EgZzaG9ydHPyBgUKA5oBAA%3D%3D")
else
return YoutubeAPI.browse(continuation: continuation)
end
end
def get_shorts(channel : AboutChannel, continuation : String? = nil)
initial_data = self.fetch_shorts_data(channel.ucid, continuation)
begin
# Try to parse the initial data fetched above
return extract_items(initial_data, channel.author, channel.ucid)
rescue ex : RetryOnceException
# Sometimes, for a completely unknown reason, the "reelItemRenderer"
# object is missing some critical information (it happens once in about
# 20 subsequent requests). Refreshing the page is required to properly
# show the "shorts" tab.
#
# In order to make the experience smoother for the user, we simulate
# said page refresh by fetching again the JSON. If that still doesn't
# work, we raise a BrokenTubeException, as something is really broken.
begin
initial_data = self.fetch_shorts_data(channel.ucid, continuation)
return extract_items(initial_data, channel.author, channel.ucid)
rescue ex : RetryOnceException
raise BrokenTubeException.new "reelPlayerHeaderSupportedRenderers"
end
end
end
# -------------------
# Livestreams
# -------------------
def get_livestreams(channel : AboutChannel, continuation : String? = nil)
if continuation.nil?
# EgdzdHJlYW1z8gYECgJ6AA%3D%3D is the protobuf object to load "streams"
initial_data = YoutubeAPI.browse(channel.ucid, params: "EgdzdHJlYW1z8gYECgJ6AA%3D%3D")
else
initial_data = YoutubeAPI.browse(continuation: continuation)
end
return extract_items(initial_data, channel.author, channel.ucid)
end
def get_60_livestreams(channel : AboutChannel, continuation : String? = nil)
if continuation.nil?
# Fetch the first "page" of streams
items, next_continuation = get_livestreams(channel)
else
# Fetch a "page" of streams using the given continuation token
items, next_continuation = get_livestreams(channel, continuation: continuation)
end
# If there is more to load, then load a second "page"
# and replace the previous continuation token
if !next_continuation.nil?
items_2, next_continuation = get_livestreams(channel, continuation: next_continuation)
items.concat items_2
end
return items, next_continuation
end
end