Add parser for categories (shelfRenderer)

This commit adds a new parser for YT's shelfRenderers which are typically used to denote different categories.The code for featured channels parsing has also been moved to use the new parser but some additional refactoring are needed there. The ContinuationExtractor has also been improved and is now capable of extraction continuation data that is packaged under "appendContinuationItemsAction" In additional this commit adds some useful helper functions to extract the current selected tab the continuation token. This is to mainly reduce code size and repetition. -- This cherry-picked commit also removes the code for parsing featured channels present on the original. (cherry picked from commit 8000d538dbbf1eb9c78e000b1449926ba3b24da9)
2025-12-19 19:38:51 +00:00 · 2021-05-07 05:13:53 -07:00
parent 1323b94b7a
commit a50f64f6e9
5 changed files with 389 additions and 244 deletions
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -13,6 +13,7 @@ private ITEM_PARSERS = {
  ChannelParser.new,
  GridPlaylistParser.new,
  PlaylistParser.new,
+  CategoryParser.new,
 }

 private struct AuthorFallback
@@ -95,7 +96,7 @@ end

 private class ChannelParser < ItemParser
  def process(item, author_fallback)
-    if item_contents = item["channelRenderer"]?
+    if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?)
      return self.parse(item_contents, author_fallback)
    end
  end
@@ -194,6 +195,88 @@ private class PlaylistParser < ItemParser
  end
 end

+private class CategoryParser < ItemParser
+  def process(item, author_fallback)
+    if item_contents = item["shelfRenderer"]?
+      return self.parse(item_contents, author_fallback)
+    end
+  end
+
+  def parse(item_contents, author_fallback)
+    # Title extraction is a bit complicated. There are two possible routes for it
+    # as well as times when the title attribute just isn't sent by YT.
+
+    title_container = item_contents["title"]? || ""
+    if !title_container.is_a? String
+      if title = title_container["simpleText"]?
+        title = title.as_s
+      else
+        title = title_container["runs"][0]["text"].as_s
+      end
+    else
+      title = ""
+    end
+
+    browse_endpoint = item_contents["endpoint"]?.try &.["browseEndpoint"] || nil
+    browse_endpoint_data = ""
+    category_type = 0 # 0: Video, 1: Channels, 2: Playlist/feed, 3: trending
+
+    # There's no endpoint data for video and trending category
+    if !item_contents["endpoint"]?
+      if !item_contents["videoId"]?
+        category_type = 3
+      end
+    end
+
+    if !browse_endpoint.nil?
+      # Playlist/feed categories doesn't need the params value (nor is it even included in yt response)
+      # instead it uses the browseId parameter. So if there isn't a params value we can assume the
+      # category is a playlist/feed
+      if browse_endpoint["params"]?
+        browse_endpoint_data = browse_endpoint["params"].as_s
+        category_type = 1
+      else
+        browse_endpoint_data = browse_endpoint["browseId"].as_s
+        category_type = 2
+      end
+    end
+
+    # Sometimes a category can have badges.
+    badges = [] of Tuple(String, String) # (Badge style, label)
+    item_contents["badges"]?.try &.as_a.each do |badge|
+      badge = badge["metadataBadgeRenderer"]
+      badges << {badge["style"].as_s, badge["label"].as_s}
+    end
+
+    # Content parsing
+    contents = [] of SearchItem
+
+    # Content could be in three locations.
+    if content_container = item_contents["content"]["horizontalListRenderer"]?
+    elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]
+    elsif content_container = item_contents["content"]["verticalListRenderer"]
+    else
+      content_container = item_contents["contents"]
+    end
+
+    raw_contents = content_container["items"].as_a
+    raw_contents.each do |item|
+      result = extract_item(item)
+      if !result.nil?
+        contents << result
+      end
+    end
+
+    Category.new({
+      title:                title,
+      contents:             contents,
+      browse_endpoint_data: browse_endpoint_data,
+      continuation_token:   nil,
+      badges:               badges,
+    })
+  end
+end
+
 # The following are the extractors for extracting an array of items from
 # the internal Youtube API's JSON response. The result is then packaged into
 # a structure we can more easily use via the parsers above. Their internals are
@@ -217,19 +300,16 @@ private class YoutubeTabsExtractor < ItemsContainerExtractor
  private def extract(target)
    raw_items = [] of JSON::Any
    selected_tab = extract_selected_tab(target["tabs"])
-    content = selected_tab["tabRenderer"]["content"]
+    content = selected_tab["content"]

    content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
      renderer_container = renderer_container["itemSectionRenderer"]
      renderer_container_contents = renderer_container["contents"].as_a[0]

-      # Shelf renderer usually refer to a category and would need special handling once
-      # An extractor for categories are added. But for now it is just used to
-      # extract items for the trending page
+      # Category extraction
      if items_container = renderer_container_contents["shelfRenderer"]?
-        if items_container["content"]["expandedShelfContentsRenderer"]?
-          items_container = items_container["content"]["expandedShelfContentsRenderer"]
-        end
+        raw_items << renderer_container_contents
+        next
      elsif items_container = renderer_container_contents["gridRenderer"]?
      else
        items_container = renderer_container_contents
@@ -265,6 +345,8 @@ private class ContinuationExtractor < ItemsContainerExtractor
  def process(initial_data)
    if target = initial_data["continuationContents"]?
      self.extract(target)
+    elsif target = initial_data["appendContinuationItemsAction"]?
+      self.extract(target)
    end
  end

@@ -272,13 +354,16 @@ private class ContinuationExtractor < ItemsContainerExtractor
    raw_items = [] of JSON::Any
    if content = target["gridContinuation"]?
      raw_items = content["items"].as_a
+    elsif content = target["continuationItems"]?
+      raw_items = content.as_a
    end

    return raw_items
  end
 end

-def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
+def extract_item(item : JSON::Any, author_fallback : String? = nil,
+                 author_id_fallback : String? = nil)
  # Parses an item from Youtube's JSON response into a more usable structure.
  # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
  author_fallback = AuthorFallback.new(author_fallback, author_id_fallback)
@@ -295,13 +380,20 @@ def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fa
  # TODO radioRenderer, showRenderer, shelfRenderer, horizontalCardListRenderer, searchPyvRenderer
 end

-def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
+def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil,
+                  author_id_fallback : String? = nil)
  items = [] of SearchItem
-  initial_data = initial_data["contents"]?.try &.as_h || initial_data["response"]?.try &.as_h || initial_data
+
+  if unpackaged_data = initial_data["contents"]?.try &.as_h
+  elsif unpackaged_data = initial_data["response"]?.try &.as_h
+  elsif unpackaged_data = initial_data["onResponseReceivedActions"]?.try &.as_a.[0].as_h
+  else
+    unpackaged_data = initial_data
+  end

  # This is identicial to the parser cyling of extract_item().
  ITEM_CONTAINER_EXTRACTOR.each do |extractor|
-    results = extractor.process(initial_data)
+    results = extractor.process(unpackaged_data)
    if !results.nil?
      results.each do |item|
        parsed_result = extract_item(item, author_fallback, author_id_fallback)
@@ -310,6 +402,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
          items << parsed_result
        end
      end
+      return items
    end
  end