Add support for the new channel layout - part 2 (#3419)

2026-02-01 04:16:27 +00:00 · 2023-01-10 21:16:12 +01:00
parent 692166bd64 a37522a03d
commit 05258d56bd
28 changed files with 765 additions and 684 deletions
--- a/src/invidious/channels/about.cr
+++ b/src/invidious/channels/about.cr
@@ -16,12 +16,6 @@ record AboutChannel,
  tabs : Array(String),
  verified : Bool

-record AboutRelatedChannel,
-  ucid : String,
-  author : String,
-  author_url : String,
-  author_thumbnail : String
-
 def get_about_info(ucid, locale) : AboutChannel
  begin
    # "EgVhYm91dA==" is the base64-encoded protobuf object {"2:string":"about"}
@@ -100,34 +94,46 @@ def get_about_info(ucid, locale) : AboutChannel
  total_views = 0_i64
  joined = Time.unix(0)

-  tabs = [] of String
+  tab_names = [] of String

-  tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?.try &.as_a?
-  if !tabs_json.nil?
-    # Retrieve information from the tabs array. The index we are looking for varies between channels.
-    tabs_json.each do |node|
-      # Try to find the about section which is located in only one of the tabs.
-      channel_about_meta = node["tabRenderer"]?.try &.["content"]?.try &.["sectionListRenderer"]?
-        .try &.["contents"]?.try &.[0]?.try &.["itemSectionRenderer"]?.try &.["contents"]?
-          .try &.[0]?.try &.["channelAboutFullMetadataRenderer"]?
+  if tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?
+    # Get the name of the tabs available on this channel
+    tab_names = tabs_json.as_a.compact_map do |entry|
+      name = entry.dig?("tabRenderer", "title").try &.as_s.downcase

-      if !channel_about_meta.nil?
-        total_views = channel_about_meta["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
-
-        # The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
-        joined = channel_about_meta["joinedDateText"]?.try &.["runs"]?.try &.as_a.reduce("") { |acc, nd| acc + nd["text"].as_s }
-          .try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
-
-        # Normal Auto-generated channels
-        # https://support.google.com/youtube/answer/2579942
-        # For auto-generated channels, channel_about_meta only has ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
-        if (channel_about_meta["primaryLinks"]?.try &.size || 0) == 1 && (channel_about_meta["primaryLinks"][0]?) &&
-           (channel_about_meta["primaryLinks"][0]["title"]?.try &.["simpleText"]?.try &.as_s? || "") == "Auto-generated by YouTube"
-          auto_generated = true
-        end
-      end
+      # This is a small fix to not add extra code on the HTML side
+      # I.e, the URL for the "live" tab is .../streams, so use "streams"
+      # everywhere for the sake of simplicity
+      (name == "live") ? "streams" : name
+    end
+
+    # Get the currently active tab ("About")
+    about_tab = extract_selected_tab(tabs_json)
+
+    # Try to find the about metadata section
+    channel_about_meta = about_tab.dig?(
+      "content",
+      "sectionListRenderer", "contents", 0,
+      "itemSectionRenderer", "contents", 0,
+      "channelAboutFullMetadataRenderer"
+    )
+
+    if !channel_about_meta.nil?
+      total_views = channel_about_meta.dig?("viewCountText", "simpleText").try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
+
+      # The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
+      joined = extract_text(channel_about_meta["joinedDateText"]?)
+        .try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
+
+      # Normal Auto-generated channels
+      # https://support.google.com/youtube/answer/2579942
+      # For auto-generated channels, channel_about_meta only has
+      # ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
+      auto_generated = (
+        (channel_about_meta["primaryLinks"]?.try &.size) == 1 && \
+           extract_text(channel_about_meta.dig?("primaryLinks", 0, "title")) == "Auto-generated by YouTube"
+      )
    end
-    tabs = tabs_json.reject { |node| node["tabRenderer"]?.nil? }.map(&.["tabRenderer"]["title"].as_s.downcase)
  end

  sub_count = initdata
@@ -148,46 +154,20 @@ def get_about_info(ucid, locale) : AboutChannel
    joined: joined,
    is_family_friendly: is_family_friendly,
    allowed_regions: allowed_regions,
-    tabs: tabs,
+    tabs: tab_names,
    verified: author_verified || false,
  )
 end

-def fetch_related_channels(about_channel : AboutChannel) : Array(AboutRelatedChannel)
-  # params is {"2:string":"channels"} encoded
-  channels = YoutubeAPI.browse(browse_id: about_channel.ucid, params: "EghjaGFubmVscw%3D%3D")
-
-  tabs = channels.dig?("contents", "twoColumnBrowseResultsRenderer", "tabs").try(&.as_a?) || [] of JSON::Any
-  tab = tabs.find(&.dig?("tabRenderer", "title").try(&.as_s?).try(&.== "Channels"))
-
-  return [] of AboutRelatedChannel if tab.nil?
-
-  items = tab.dig?(
-    "tabRenderer", "content",
-    "sectionListRenderer", "contents", 0,
-    "itemSectionRenderer", "contents", 0,
-    "gridRenderer", "items"
-  ).try &.as_a?
-
-  related = [] of AboutRelatedChannel
-  return related if (items.nil? || items.empty?)
-
-  items.each do |item|
-    renderer = item["gridChannelRenderer"]?
-    next if !renderer
-
-    related_id = renderer.dig("channelId").as_s
-    related_title = renderer.dig("title", "simpleText").as_s
-    related_author_url = renderer.dig("navigationEndpoint", "browseEndpoint", "canonicalBaseUrl").as_s
-    related_author_thumbnail = HelperExtractors.get_thumbnails(renderer)
-
-    related << AboutRelatedChannel.new(
-      ucid: related_id,
-      author: related_title,
-      author_url: related_author_url,
-      author_thumbnail: related_author_thumbnail,
-    )
+def fetch_related_channels(about_channel : AboutChannel, continuation : String? = nil) : {Array(SearchChannel), String?}
+  if continuation.nil?
+    # params is {"2:string":"channels"} encoded
+    initial_data = YoutubeAPI.browse(browse_id: about_channel.ucid, params: "EghjaGFubmVscw%3D%3D")
+  else
+    initial_data = YoutubeAPI.browse(continuation)
  end

-  return related
+  items, continuation = extract_items(initial_data)
+
+  return items.select(SearchChannel), continuation
 end
--- a/src/invidious/channels/channels.cr
+++ b/src/invidious/channels/channels.cr
@@ -180,11 +180,16 @@ def fetch_channel(ucid, pull_all_videos : Bool)

  LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")

-  page = 1
+  channel = InvidiousChannel.new({
+    id:         ucid,
+    author:     author,
+    updated:    Time.utc,
+    deleted:    false,
+    subscribed: nil,
+  })

  LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
-  initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
-  videos = extract_videos(initial_data, author, ucid)
+  videos, continuation = IV::Channel::Tabs.get_videos(channel)

  LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
  rss.xpath_nodes("//feed/entry").each do |entry|
@@ -197,7 +202,9 @@ def fetch_channel(ucid, pull_all_videos : Bool)
    views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
    views ||= 0_i64

-    channel_video = videos.select { |video| video.id == video_id }[0]?
+    channel_video = videos
+      .select(SearchVideo)
+      .select(&.id.== video_id)[0]?

    length_seconds = channel_video.try &.length_seconds
    length_seconds ||= 0
@@ -239,30 +246,25 @@ def fetch_channel(ucid, pull_all_videos : Bool)
  end

  if pull_all_videos
-    page += 1
-
-    ids = [] of String
-
    loop do
-      initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
-      videos = extract_videos(initial_data, author, ucid)
+      # Keep fetching videos using the continuation token retrieved earlier
+      videos, continuation = IV::Channel::Tabs.get_videos(channel, continuation: continuation)

-      count = videos.size
-      videos = videos.map { |video| ChannelVideo.new({
-        id:                 video.id,
-        title:              video.title,
-        published:          video.published,
-        updated:            Time.utc,
-        ucid:               video.ucid,
-        author:             video.author,
-        length_seconds:     video.length_seconds,
-        live_now:           video.live_now,
-        premiere_timestamp: video.premiere_timestamp,
-        views:              video.views,
-      }) }
-
-      videos.each do |video|
-        ids << video.id
+      count = 0
+      videos.select(SearchVideo).each do |video|
+        count += 1
+        video = ChannelVideo.new({
+          id:                 video.id,
+          title:              video.title,
+          published:          video.published,
+          updated:            Time.utc,
+          ucid:               video.ucid,
+          author:             video.author,
+          length_seconds:     video.length_seconds,
+          live_now:           video.live_now,
+          premiere_timestamp: video.premiere_timestamp,
+          views:              video.views,
+        })

        # We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
        # so since they don't provide a published date here we can safely ignore them.
@@ -279,17 +281,10 @@ def fetch_channel(ucid, pull_all_videos : Bool)
      end

      break if count < 25
-      page += 1
+      sleep 500.milliseconds
    end
  end

-  channel = InvidiousChannel.new({
-    id:         ucid,
-    author:     author,
-    updated:    Time.utc,
-    deleted:    false,
-    subscribed: nil,
-  })
-
+  channel.updated = Time.utc
  return channel
 end
--- a/src/invidious/channels/playlists.cr
+++ b/src/invidious/channels/playlists.cr
@@ -1,93 +1,28 @@
 def fetch_channel_playlists(ucid, author, continuation, sort_by)
  if continuation
-    response_json = YoutubeAPI.browse(continuation)
-    continuation_items = response_json["onResponseReceivedActions"]?
-      .try &.[0]["appendContinuationItemsAction"]["continuationItems"]
-
-    return [] of SearchItem, nil if !continuation_items
-
-    items = [] of SearchItem
-    continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
-      extract_item(item, author, ucid).try { |t| items << t }
-    }
-
-    continuation = continuation_items.as_a.last["continuationItemRenderer"]?
-      .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
+    initial_data = YoutubeAPI.browse(continuation)
  else
-    url = "/channel/#{ucid}/playlists?flow=list&view=1"
+    params =
+      case sort_by
+      when "last", "last_added"
+        # Equivalent to "&sort=lad"
+        # {"2:string": "playlists", "3:varint": 4, "4:varint": 1, "6:varint": 1}
+        "EglwbGF5bGlzdHMYBCABMAE%3D"
+      when "oldest", "oldest_created"
+        # formerly "&sort=da"
+        # Not available anymore :c or maybe ??
+        # {"2:string": "playlists", "3:varint": 2, "4:varint": 1, "6:varint": 1}
+        "EglwbGF5bGlzdHMYAiABMAE%3D"
+        # {"2:string": "playlists", "3:varint": 1, "4:varint": 1, "6:varint": 1}
+        # "EglwbGF5bGlzdHMYASABMAE%3D"
+      when "newest", "newest_created"
+        # Formerly "&sort=dd"
+        # {"2:string": "playlists", "3:varint": 3, "4:varint": 1, "6:varint": 1}
+        "EglwbGF5bGlzdHMYAyABMAE%3D"
+      end

-    case sort_by
-    when "last", "last_added"
-      #
-    when "oldest", "oldest_created"
-      url += "&sort=da"
-    when "newest", "newest_created"
-      url += "&sort=dd"
-    else nil # Ignore
-    end
-
-    response = YT_POOL.client &.get(url)
-    initial_data = extract_initial_data(response.body)
-    return [] of SearchItem, nil if !initial_data
-
-    items = extract_items(initial_data, author, ucid)
-    continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]?
+    initial_data = YoutubeAPI.browse(ucid, params: params || "")
  end

-  return items, continuation
-end
-
-# ## NOTE: DEPRECATED
-# Reason -> Unstable
-# The Protobuf object must be provided with an id of the last playlist from the current "page"
-# in order to fetch the next one accurately
-# (if the id isn't included, entries shift around erratically between pages,
-# leading to repetitions and skip overs)
-#
-# Since it's impossible to produce the appropriate Protobuf without an id being provided by the user,
-# it's better to stick to continuation tokens provided by the first request and onward
-def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated = false)
-  object = {
-    "80226972:embedded" => {
-      "2:string" => ucid,
-      "3:base64" => {
-        "2:string"  => "playlists",
-        "6:varint"  => 2_i64,
-        "7:varint"  => 1_i64,
-        "12:varint" => 1_i64,
-        "13:string" => "",
-        "23:varint" => 0_i64,
-      },
-    },
-  }
-
-  if cursor
-    cursor = Base64.urlsafe_encode(cursor, false) if !auto_generated
-    object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = cursor
-  end
-
-  if auto_generated
-    object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x32_i64
-  else
-    object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 1_i64
-    case sort
-    when "oldest", "oldest_created"
-      object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 2_i64
-    when "newest", "newest_created"
-      object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 3_i64
-    when "last", "last_added"
-      object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 4_i64
-    else nil # Ignore
-    end
-  end
-
-  object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
-  object["80226972:embedded"].delete("3:base64")
-
-  continuation = object.try { |i| Protodec::Any.cast_json(i) }
-    .try { |i| Protodec::Any.from_json(i) }
-    .try { |i| Base64.urlsafe_encode(i) }
-    .try { |i| URI.encode_www_form(i) }
-
-  return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
+  return extract_items(initial_data, author, ucid)
 end
--- a/src/invidious/channels/videos.cr
+++ b/src/invidious/channels/videos.cr
@@ -16,6 +16,14 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
    .try { |i| Base64.urlsafe_encode(i) }
    .try { |i| URI.encode_www_form(i) }

+  sort_by_numerical =
+    case sort_by
+    when "newest"  then 1_i64
+    when "popular" then 2_i64
+    when "oldest"  then 3_i64 # Broken as of 10/2022 :c
+    else                1_i64 # Fallback to "newest"
+    end
+
  object_inner_1 = {
    "110:embedded" => {
      "3:embedded" => {
@@ -24,7 +32,7 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
            "1:string" => object_inner_2_encoded,
            "2:string" => "00000000-0000-0000-0000-000000000000",
          },
-          "3:varint" => 1_i64,
+          "3:varint" => sort_by_numerical,
        },
      },
    },
@@ -52,34 +60,138 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
  return continuation
 end

-def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
-  continuation = produce_channel_videos_continuation(ucid, page,
-    auto_generated: auto_generated, sort_by: sort_by, v2: true)
-
-  return YoutubeAPI.browse(continuation)
-end
-
-def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
-  videos = [] of SearchVideo
-
-  # 2.times do |i|
-  # initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
-  initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by)
-  videos = extract_videos(initial_data, author, ucid)
-  # end
-
-  return videos.size, videos
-end
-
-def get_latest_videos(ucid)
-  initial_data = get_channel_videos_response(ucid)
-  author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
-
-  return extract_videos(initial_data, author, ucid)
-end
-
 # Used in bypass_captcha_job.cr
 def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
  continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
  return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
 end
+
+module Invidious::Channel::Tabs
+  extend self
+
+  # -------------------
+  #  Regular videos
+  # -------------------
+
+  def make_initial_video_ctoken(ucid, sort_by) : String
+    return produce_channel_videos_continuation(ucid, sort_by: sort_by)
+  end
+
+  # Wrapper for AboutChannel, as we still need to call get_videos with
+  # an author name and ucid directly (e.g in RSS feeds).
+  # TODO: figure out how to get rid of that
+  def get_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
+    return get_videos(
+      channel.author, channel.ucid,
+      continuation: continuation, sort_by: sort_by
+    )
+  end
+
+  # Wrapper for InvidiousChannel, as we still need to call get_videos with
+  # an author name and ucid directly (e.g in RSS feeds).
+  # TODO: figure out how to get rid of that
+  def get_videos(channel : InvidiousChannel, *, continuation : String? = nil, sort_by = "newest")
+    return get_videos(
+      channel.author, channel.id,
+      continuation: continuation, sort_by: sort_by
+    )
+  end
+
+  def get_videos(author : String, ucid : String, *, continuation : String? = nil, sort_by = "newest")
+    continuation ||= make_initial_video_ctoken(ucid, sort_by)
+    initial_data = YoutubeAPI.browse(continuation: continuation)
+
+    return extract_items(initial_data, author, ucid)
+  end
+
+  def get_60_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest")
+    if continuation.nil?
+      # Fetch the first "page" of video
+      items, next_continuation = get_videos(channel, sort_by: sort_by)
+    else
+      # Fetch a "page" of videos using the given continuation token
+      items, next_continuation = get_videos(channel, continuation: continuation)
+    end
+
+    # If there is more to load, then load a second "page"
+    # and replace the previous continuation token
+    if !next_continuation.nil?
+      items_2, next_continuation = get_videos(channel, continuation: next_continuation)
+      items.concat items_2
+    end
+
+    return items, next_continuation
+  end
+
+  # -------------------
+  #  Shorts
+  # -------------------
+
+  private def fetch_shorts_data(ucid : String, continuation : String? = nil)
+    if continuation.nil?
+      # EgZzaG9ydHPyBgUKA5oBAA%3D%3D is the protobuf object to load "shorts"
+      # TODO: try to extract the continuation tokens that allows other sorting options
+      return YoutubeAPI.browse(ucid, params: "EgZzaG9ydHPyBgUKA5oBAA%3D%3D")
+    else
+      return YoutubeAPI.browse(continuation: continuation)
+    end
+  end
+
+  def get_shorts(channel : AboutChannel, continuation : String? = nil)
+    initial_data = self.fetch_shorts_data(channel.ucid, continuation)
+
+    begin
+      # Try to parse the initial data fetched above
+      return extract_items(initial_data, channel.author, channel.ucid)
+    rescue ex : RetryOnceException
+      # Sometimes, for a completely unknown reason, the "reelItemRenderer"
+      # object is missing some critical information (it happens once in about
+      # 20 subsequent requests). Refreshing the page is required to properly
+      # show the "shorts" tab.
+      #
+      # In order to make the experience smoother for the user, we simulate
+      # said page refresh by fetching again the JSON. If that still doesn't
+      # work, we raise a BrokenTubeException, as something is really broken.
+      begin
+        initial_data = self.fetch_shorts_data(channel.ucid, continuation)
+        return extract_items(initial_data, channel.author, channel.ucid)
+      rescue ex : RetryOnceException
+        raise BrokenTubeException.new "reelPlayerHeaderSupportedRenderers"
+      end
+    end
+  end
+
+  # -------------------
+  #  Livestreams
+  # -------------------
+
+  def get_livestreams(channel : AboutChannel, continuation : String? = nil)
+    if continuation.nil?
+      # EgdzdHJlYW1z8gYECgJ6AA%3D%3D is the protobuf object to load "streams"
+      initial_data = YoutubeAPI.browse(channel.ucid, params: "EgdzdHJlYW1z8gYECgJ6AA%3D%3D")
+    else
+      initial_data = YoutubeAPI.browse(continuation: continuation)
+    end
+
+    return extract_items(initial_data, channel.author, channel.ucid)
+  end
+
+  def get_60_livestreams(channel : AboutChannel, continuation : String? = nil)
+    if continuation.nil?
+      # Fetch the first "page" of streams
+      items, next_continuation = get_livestreams(channel)
+    else
+      # Fetch a "page" of streams using the given continuation token
+      items, next_continuation = get_livestreams(channel, continuation: continuation)
+    end
+
+    # If there is more to load, then load a second "page"
+    # and replace the previous continuation token
+    if !next_continuation.nil?
+      items_2, next_continuation = get_livestreams(channel, continuation: next_continuation)
+      items.concat items_2
+    end
+
+    return items, next_continuation
+  end
+end