From 512b0e676a2c6214d93d849f1ef58670ffd59037 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Wed, 2 Jul 2025 13:38:42 -0400 Subject: [PATCH 1/5] extract hash from js --- yt_dlp/extractor/khanacademy.py | 108 ++++++++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py index 42eef3c922..b487fcd74e 100644 --- a/yt_dlp/extractor/khanacademy.py +++ b/yt_dlp/extractor/khanacademy.py @@ -1,8 +1,11 @@ import json +import re from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, + js_to_json, make_archive_id, parse_iso8601, str_or_none, @@ -13,9 +16,101 @@ class KhanAcademyBaseIE(InfoExtractor): + _RUNTIME_JS_URL = None + _MAIN_JS_URL = None _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P(?:[^/]+/){%s}%s[^?#/&]+)' - _PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4' + def _parse_js_urls(self, webpage): + search = lambda name: self._search_regex( + rf'