channel: use YT API + extractors to fetch videos

2026-01-31 18:56:37 +00:00 · 2022-11-11 20:26:34 +01:00
parent c5ee2bfc0f
commit 2903e896ec
5 changed files with 127 additions and 104 deletions
--- a/src/invidious/channels/channels.cr
+++ b/src/invidious/channels/channels.cr
@@ -180,11 +180,16 @@ def fetch_channel(ucid, pull_all_videos : Bool)

  LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")

-  page = 1
+  channel = InvidiousChannel.new({
+    id:         ucid,
+    author:     author,
+    updated:    Time.utc,
+    deleted:    false,
+    subscribed: nil,
+  })

  LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
-  initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
-  videos = extract_videos(initial_data, author, ucid)
+  videos, continuation = IV::Channel::Tabs.get_videos(channel)

  LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
  rss.xpath_nodes("//feed/entry").each do |entry|
@@ -197,7 +202,9 @@ def fetch_channel(ucid, pull_all_videos : Bool)
    views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
    views ||= 0_i64

-    channel_video = videos.select { |video| video.id == video_id }[0]?
+    channel_video = videos
+      .select(SearchVideo)
+      .select(&.id.== video_id)[0]?

    length_seconds = channel_video.try &.length_seconds
    length_seconds ||= 0
@@ -235,30 +242,25 @@ def fetch_channel(ucid, pull_all_videos : Bool)
  end

  if pull_all_videos
-    page += 1
-
-    ids = [] of String
-
    loop do
-      initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
-      videos = extract_videos(initial_data, author, ucid)
+      # Keep fetching videos using the continuation token retrieved earlier
+      videos, continuation = IV::Channel::Tabs.get_videos(channel, continuation: continuation)

-      count = videos.size
-      videos = videos.map { |video| ChannelVideo.new({
-        id:                 video.id,
-        title:              video.title,
-        published:          video.published,
-        updated:            Time.utc,
-        ucid:               video.ucid,
-        author:             video.author,
-        length_seconds:     video.length_seconds,
-        live_now:           video.live_now,
-        premiere_timestamp: video.premiere_timestamp,
-        views:              video.views,
-      }) }
-
-      videos.each do |video|
-        ids << video.id
+      count = 0
+      videos.select(SearchVideo).each do |video|
+        count += 1
+        video = ChannelVideo.new({
+          id:                 video.id,
+          title:              video.title,
+          published:          video.published,
+          updated:            Time.utc,
+          ucid:               video.ucid,
+          author:             video.author,
+          length_seconds:     video.length_seconds,
+          live_now:           video.live_now,
+          premiere_timestamp: video.premiere_timestamp,
+          views:              video.views,
+        })

        # We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
        # so since they don't provide a published date here we can safely ignore them.
@@ -269,17 +271,10 @@ def fetch_channel(ucid, pull_all_videos : Bool)
      end

      break if count < 25
-      page += 1
+      sleep 500.milliseconds
    end
  end

-  channel = InvidiousChannel.new({
-    id:         ucid,
-    author:     author,
-    updated:    Time.utc,
-    deleted:    false,
-    subscribed: nil,
-  })
-
+  channel.updated = Time.utc
  return channel
 end
--- a/src/invidious/channels/playlists.cr
+++ b/src/invidious/channels/playlists.cr
@@ -24,7 +24,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by)
    initial_data = YoutubeAPI.browse(ucid, params: params || "")
  end

-  return extract_items(initial_data, ucid, author)
+  return extract_items(initial_data, author, ucid)
 end

 # ## NOTE: DEPRECATED
--- a/src/invidious/channels/videos.cr
+++ b/src/invidious/channels/videos.cr
@@ -16,6 +16,14 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
    .try { |i| Base64.urlsafe_encode(i) }
    .try { |i| URI.encode_www_form(i) }

+  sort_by_numerical =
+    case sort_by
+    when "newest"  then 1_i64
+    when "popular" then 2_i64
+    when "oldest"  then 3_i64 # Broken as of 10/2022 :c
+    else                1_i64 # Fallback to "newest"
+    end
+
  object_inner_1 = {
    "110:embedded" => {
      "3:embedded" => {
@@ -24,7 +32,7 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
            "1:string" => object_inner_2_encoded,
            "2:string" => "00000000-0000-0000-0000-000000000000",
          },
-          "3:varint" => 1_i64,
+          "3:varint" => sort_by_numerical,
        },
      },
    },
@@ -52,34 +60,66 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
  return continuation
 end

-def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
-  continuation = produce_channel_videos_continuation(ucid, page,
-    auto_generated: auto_generated, sort_by: sort_by, v2: true)
-
-  return YoutubeAPI.browse(continuation)
-end
-
-def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
-  videos = [] of SearchVideo
-
-  # 2.times do |i|
-  # initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
-  initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by)
-  videos = extract_videos(initial_data, author, ucid)
-  # end
-
-  return videos.size, videos
-end
-
-def get_latest_videos(ucid)
-  initial_data = get_channel_videos_response(ucid)
-  author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
-
-  return extract_videos(initial_data, author, ucid)
-end
-
 # Used in bypass_captcha_job.cr
 def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
  continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
  return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
 end
+
+module Invidious::Channel::Tabs
+  extend self
+
+  # -------------------
+  #  Regular videos
+  # -------------------
+
+  def make_initial_video_ctoken(ucid, sort_by) : String
+    return produce_channel_videos_continuation(ucid, sort_by: sort_by)
+  end
+
+  # Wrapper for AboutChannel, as we still need to call get_videos with
+  # an author name and ucid directly (e.g in RSS feeds).
+  # TODO: figure out how to get rid of that
+  def get_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
+    return get_videos(
+      channel.author, channel.ucid,
+      continuation: continuation, sort_by: sort_by
+    )
+  end
+
+  # Wrapper for InvidiousChannel, as we still need to call get_videos with
+  # an author name and ucid directly (e.g in RSS feeds).
+  # TODO: figure out how to get rid of that
+  def get_videos(channel : InvidiousChannel, *, continuation : String? = nil, sort_by = "newest")
+    return get_videos(
+      channel.author, channel.id,
+      continuation: continuation, sort_by: sort_by
+    )
+  end
+
+  def get_videos(author : String, ucid : String, *, continuation : String? = nil, sort_by = "newest")
+    continuation ||= make_initial_video_ctoken(ucid, sort_by)
+    initial_data = YoutubeAPI.browse(continuation: continuation)
+
+    return extract_items(initial_data, author, ucid)
+  end
+
+  def get_60_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
+    if continuation.nil?
+      # Fetch the first "page" of video
+      items, next_continuation = get_videos(channel, sort_by: sort_by)
+    else
+      # Fetch a "page" of videos using the given continuation token
+      items, next_continuation = get_videos(channel, continuation: continuation)
+    end
+
+    # If there is more to load, then load a second "page"
+    # and replace the previous continuation token
+    if !next_continuation.nil?
+      items_2, next_continuation = get_videos(channel, continuation: next_continuation)
+      items.concat items_2
+    end
+
+    return items, next_continuation
+  end
+end