channel: use YT API + extractors to fetch videos

This commit is contained in:
Samantaz Fox
2022-11-11 20:26:34 +01:00
parent c5ee2bfc0f
commit 2903e896ec
5 changed files with 127 additions and 104 deletions

View File

@@ -180,11 +180,16 @@ def fetch_channel(ucid, pull_all_videos : Bool)
LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")
page = 1
channel = InvidiousChannel.new({
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
videos, continuation = IV::Channel::Tabs.get_videos(channel)
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
rss.xpath_nodes("//feed/entry").each do |entry|
@@ -197,7 +202,9 @@ def fetch_channel(ucid, pull_all_videos : Bool)
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
views ||= 0_i64
channel_video = videos.select { |video| video.id == video_id }[0]?
channel_video = videos
.select(SearchVideo)
.select(&.id.== video_id)[0]?
length_seconds = channel_video.try &.length_seconds
length_seconds ||= 0
@@ -235,30 +242,25 @@ def fetch_channel(ucid, pull_all_videos : Bool)
end
if pull_all_videos
page += 1
ids = [] of String
loop do
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
# Keep fetching videos using the continuation token retrieved earlier
videos, continuation = IV::Channel::Tabs.get_videos(channel, continuation: continuation)
count = videos.size
videos = videos.map { |video| ChannelVideo.new({
id: video.id,
title: video.title,
published: video.published,
updated: Time.utc,
ucid: video.ucid,
author: video.author,
length_seconds: video.length_seconds,
live_now: video.live_now,
premiere_timestamp: video.premiere_timestamp,
views: video.views,
}) }
videos.each do |video|
ids << video.id
count = 0
videos.select(SearchVideo).each do |video|
count += 1
video = ChannelVideo.new({
id: video.id,
title: video.title,
published: video.published,
updated: Time.utc,
ucid: video.ucid,
author: video.author,
length_seconds: video.length_seconds,
live_now: video.live_now,
premiere_timestamp: video.premiere_timestamp,
views: video.views,
})
# We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
# so since they don't provide a published date here we can safely ignore them.
@@ -269,17 +271,10 @@ def fetch_channel(ucid, pull_all_videos : Bool)
end
break if count < 25
page += 1
sleep 500.milliseconds
end
end
channel = InvidiousChannel.new({
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
channel.updated = Time.utc
return channel
end

View File

@@ -24,7 +24,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by)
initial_data = YoutubeAPI.browse(ucid, params: params || "")
end
return extract_items(initial_data, ucid, author)
return extract_items(initial_data, author, ucid)
end
# ## NOTE: DEPRECATED

View File

@@ -16,6 +16,14 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
sort_by_numerical =
case sort_by
when "newest" then 1_i64
when "popular" then 2_i64
when "oldest" then 3_i64 # Broken as of 10/2022 :c
else 1_i64 # Fallback to "newest"
end
object_inner_1 = {
"110:embedded" => {
"3:embedded" => {
@@ -24,7 +32,7 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
"1:string" => object_inner_2_encoded,
"2:string" => "00000000-0000-0000-0000-000000000000",
},
"3:varint" => 1_i64,
"3:varint" => sort_by_numerical,
},
},
},
@@ -52,34 +60,66 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
return continuation
end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
continuation = produce_channel_videos_continuation(ucid, page,
auto_generated: auto_generated, sort_by: sort_by, v2: true)
return YoutubeAPI.browse(continuation)
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo
# 2.times do |i|
# initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by)
videos = extract_videos(initial_data, author, ucid)
# end
return videos.size, videos
end
def get_latest_videos(ucid)
initial_data = get_channel_videos_response(ucid)
author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
return extract_videos(initial_data, author, ucid)
end
# Used in bypass_captcha_job.cr
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end
module Invidious::Channel::Tabs
extend self
# -------------------
# Regular videos
# -------------------
def make_initial_video_ctoken(ucid, sort_by) : String
return produce_channel_videos_continuation(ucid, sort_by: sort_by)
end
# Wrapper for AboutChannel, as we still need to call get_videos with
# an author name and ucid directly (e.g in RSS feeds).
# TODO: figure out how to get rid of that
def get_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
return get_videos(
channel.author, channel.ucid,
continuation: continuation, sort_by: sort_by
)
end
# Wrapper for InvidiousChannel, as we still need to call get_videos with
# an author name and ucid directly (e.g in RSS feeds).
# TODO: figure out how to get rid of that
def get_videos(channel : InvidiousChannel, *, continuation : String? = nil, sort_by = "newest")
return get_videos(
channel.author, channel.id,
continuation: continuation, sort_by: sort_by
)
end
def get_videos(author : String, ucid : String, *, continuation : String? = nil, sort_by = "newest")
continuation ||= make_initial_video_ctoken(ucid, sort_by)
initial_data = YoutubeAPI.browse(continuation: continuation)
return extract_items(initial_data, author, ucid)
end
def get_60_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
if continuation.nil?
# Fetch the first "page" of video
items, next_continuation = get_videos(channel, sort_by: sort_by)
else
# Fetch a "page" of videos using the given continuation token
items, next_continuation = get_videos(channel, continuation: continuation)
end
# If there is more to load, then load a second "page"
# and replace the previous continuation token
if !next_continuation.nil?
items_2, next_continuation = get_videos(channel, continuation: next_continuation)
items.concat items_2
end
return items, next_continuation
end
end