1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-11-13 13:05:13 +00:00

[ie/youtube] Implement external n/sig solver (#14157)

Closes #14404, Closes #14431, Closes #14680, Closes #14707

Authored by: bashonly, coletdjnz, seproDev, Grub4K

Co-authored-by: coletdjnz <coletdjnz@protonmail.com>
Co-authored-by: bashonly <bashonly@protonmail.com>
Co-authored-by: sepro <sepro@sepr0.com>
This commit is contained in:
Simon Sawicki
2025-10-31 23:13:04 +01:00
committed by GitHub
parent d6ee677253
commit 6224a38988
45 changed files with 3387 additions and 1127 deletions

View File

@@ -214,7 +214,7 @@ jobs:
- name: Build Unix platform-independent binary
run: |
make all tar
make all-extra tar
- name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION

77
.github/workflows/challenge-tests.yml vendored Normal file
View File

@@ -0,0 +1,77 @@
name: Challenge Tests
on:
push:
paths:
- .github/workflows/challenge-tests.yml
- test/test_jsc/*.py
- yt_dlp/extractor/youtube/jsc/**.js
- yt_dlp/extractor/youtube/jsc/**.py
- yt_dlp/extractor/youtube/pot/**.py
- yt_dlp/utils/_jsruntime.py
pull_request:
paths:
- .github/workflows/challenge-tests.yml
- test/test_jsc/*.py
- yt_dlp/extractor/youtube/jsc/**.js
- yt_dlp/extractor/youtube/jsc/**.py
- yt_dlp/extractor/youtube/pot/**.py
- yt_dlp/utils/_jsruntime.py
permissions:
contents: read
concurrency:
group: challenge-tests-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
tests:
name: Challenge Tests
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', pypy-3.11]
env:
QJS_VERSION: '2025-04-26' # Earliest version with rope strings
steps:
- uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install Deno
uses: denoland/setup-deno@v2
with:
deno-version: '2.0.0' # minimum supported version
- name: Install Bun
uses: oven-sh/setup-bun@v2
with:
# minimum supported version is 1.0.31 but earliest available Windows version is 1.1.0
bun-version: ${{ (matrix.os == 'windows-latest' && '1.1.0') || '1.0.31' }}
- name: Install Node
uses: actions/setup-node@v6
with:
node-version: '20.0' # minimum supported version
- name: Install QuickJS (Linux)
if: matrix.os == 'ubuntu-latest'
run: |
wget "https://bellard.org/quickjs/binary_releases/quickjs-linux-x86_64-${QJS_VERSION}.zip" -O quickjs.zip
unzip quickjs.zip qjs
sudo install qjs /usr/local/bin/qjs
- name: Install QuickJS (Windows)
if: matrix.os == 'windows-latest'
shell: pwsh
run: |
Invoke-WebRequest "https://bellard.org/quickjs/binary_releases/quickjs-win-x86_64-${Env:QJS_VERSION}.zip" -OutFile quickjs.zip
unzip quickjs.zip
- name: Install test requirements
run: |
python ./devscripts/install_deps.py --print --only-optional-groups --include-group test > requirements.txt
python ./devscripts/install_deps.py --print -c certifi -c requests -c urllib3 -c yt-dlp-ejs >> requirements.txt
python -m pip install -U -r requirements.txt
- name: Run tests
timeout-minutes: 15
run: |
python -m yt_dlp -v --js-runtimes node --js-runtimes bun --js-runtimes quickjs || true
python ./devscripts/run_tests.py test/test_jsc -k download

View File

@@ -7,6 +7,7 @@ on:
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/**.py'
- yt_dlp/extractor/youtube/**.py
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
@@ -17,6 +18,7 @@ on:
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/**.py'
- yt_dlp/extractor/youtube/**.py
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py

View File

@@ -269,9 +269,10 @@ jobs:
"[![Master](https://img.shields.io/badge/Master%20builds-lightblue.svg?style=for-the-badge)]" \
"(https://github.com/${MASTER_REPO}/releases/latest \"Master builds\")" >> ./RELEASE_NOTES
fi
printf '\n\n%s\n\n%s%s\n\n---\n' \
printf '\n\n%s\n\n%s%s%s\n\n---\n' \
"#### A description of the various files is in the [README](https://github.com/${REPOSITORY}#release-files)" \
"The PyInstaller-bundled executables are subject to the licenses described in " \
"The zipimport Unix executable contains code licensed under ISC and MIT. " \
"The PyInstaller-bundled executables are subject to these and other licenses, all of which are compiled in " \
"[THIRD_PARTY_LICENSES.txt](https://github.com/${BASE_REPO}/blob/${HEAD_SHA}/THIRD_PARTY_LICENSES.txt)" >> ./RELEASE_NOTES
python ./devscripts/make_changelog.py -vv --collapsible >> ./RELEASE_NOTES
printf '%s\n\n' '**This is a pre-release build**' >> ./PRERELEASE_NOTES

View File

@@ -1,41 +0,0 @@
name: Signature Tests
on:
push:
paths:
- .github/workflows/signature-tests.yml
- test/test_youtube_signature.py
- yt_dlp/jsinterp.py
pull_request:
paths:
- .github/workflows/signature-tests.yml
- test/test_youtube_signature.py
- yt_dlp/jsinterp.py
permissions:
contents: read
concurrency:
group: signature-tests-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
tests:
name: Signature Tests
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', pypy-3.11]
steps:
- uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install test requirements
run: python ./devscripts/install_deps.py --only-optional-groups --include-group test
- name: Run tests
timeout-minutes: 15
run: |
python3 -m yt_dlp -v || true # Print debug head
python3 ./devscripts/run_tests.py test/test_youtube_signature.py

4
.gitignore vendored
View File

@@ -107,6 +107,7 @@ README.txt
test/testdata/sigs/player-*.js
test/testdata/thumbnails/empty.webp
test/testdata/thumbnails/foo\ %d\ bar/foo_%d.*
.ejs-*
# Binary
/youtube-dl
@@ -129,3 +130,6 @@ yt-dlp.zip
# Plugins
ytdlp_plugins/
yt-dlp-plugins
# Packages
yt_dlp_ejs/

103
Makefile
View File

@@ -1,4 +1,5 @@
all: lazy-extractors yt-dlp doc pypi-files
all-extra: lazy-extractors yt-dlp-extra doc pypi-files
clean: clean-test clean-dist
clean-all: clean clean-cache
completions: completion-bash completion-fish completion-zsh
@@ -15,7 +16,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
.PHONY: all clean clean-all clean-test clean-dist clean-cache \
completions completion-bash completion-fish completion-zsh \
doc issuetemplates supportedsites ot offlinetest codetest test \
tar pypi-files lazy-extractors install uninstall
tar pypi-files lazy-extractors install uninstall \
all-extra yt-dlp-extra current-ejs-version
.IGNORE: current-ejs-version
.SILENT: current-ejs-version
clean-test:
rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
@@ -25,7 +30,8 @@ clean-test:
test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."*
clean-dist:
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS \
yt-dlp.zip .ejs-* yt_dlp_ejs/
clean-cache:
find . \( \
-type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
@@ -81,28 +87,49 @@ test:
offlinetest: codetest
$(PYTHON) -m pytest -Werror -m "not download"
CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
CODE_FOLDERS != $(CODE_FOLDERS_CMD)
CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
CODE_FILES != $(CODE_FILES_CMD)
CODE_FILES ?= $(shell $(CODE_FILES_CMD))
yt-dlp: $(CODE_FILES)
PY_CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's|/__init__\.py||' | grep -v '/__' | sort
PY_CODE_FOLDERS != $(PY_CODE_FOLDERS_CMD)
PY_CODE_FOLDERS ?= $(shell $(PY_CODE_FOLDERS_CMD))
PY_CODE_FILES_CMD = for f in $(PY_CODE_FOLDERS) ; do echo "$$f" | sed 's|$$|/*.py|' ; done
PY_CODE_FILES != $(PY_CODE_FILES_CMD)
PY_CODE_FILES ?= $(shell $(PY_CODE_FILES_CMD))
JS_CODE_FOLDERS_CMD = find yt_dlp -type f -name '*.js' | sed 's|/[^/]\{1,\}\.js$$||' | uniq
JS_CODE_FOLDERS != $(JS_CODE_FOLDERS_CMD)
JS_CODE_FOLDERS ?= $(shell $(JS_CODE_FOLDERS_CMD))
JS_CODE_FILES_CMD = for f in $(JS_CODE_FOLDERS) ; do echo "$$f" | sed 's|$$|/*.js|' ; done
JS_CODE_FILES != $(JS_CODE_FILES_CMD)
JS_CODE_FILES ?= $(shell $(JS_CODE_FILES_CMD))
yt-dlp.zip: $(PY_CODE_FILES) $(JS_CODE_FILES)
mkdir -p zip
for d in $(CODE_FOLDERS) ; do \
for d in $(PY_CODE_FOLDERS) ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\
done
(cd zip && touch -t 200001010101 $(CODE_FILES))
mv zip/yt_dlp/__main__.py zip/
(cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
for d in $(JS_CODE_FOLDERS) ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.js zip/$$d/ ;\
done
(cd zip && touch -t 200001010101 $(PY_CODE_FILES) $(JS_CODE_FILES))
rm -f zip/yt_dlp/__main__.py
(cd zip && zip -q ../yt-dlp.zip $(PY_CODE_FILES) $(JS_CODE_FILES))
rm -rf zip
yt-dlp: yt-dlp.zip
mkdir -p zip
cp -pP yt_dlp/__main__.py zip/
touch -t 200001010101 zip/__main__.py
(cd zip && zip -q ../yt-dlp.zip __main__.py)
echo '#!$(PYTHON)' > yt-dlp
cat yt-dlp.zip >> yt-dlp
rm yt-dlp.zip
chmod a+x yt-dlp
rm -rf zip
README.md: $(CODE_FILES) devscripts/make_readme.py
README.md: $(PY_CODE_FILES) devscripts/make_readme.py
COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py
CONTRIBUTING.md: README.md devscripts/make_contributing.py
@@ -127,15 +154,15 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
rm -f yt-dlp.1.temp.md
completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in
completions/bash/yt-dlp: $(PY_CODE_FILES) devscripts/bash-completion.in
mkdir -p completions/bash
$(PYTHON) devscripts/bash-completion.py
completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in
completions/zsh/_yt-dlp: $(PY_CODE_FILES) devscripts/zsh-completion.in
mkdir -p completions/zsh
$(PYTHON) devscripts/zsh-completion.py
completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
completions/fish/yt-dlp.fish: $(PY_CODE_FILES) devscripts/fish-completion.in
mkdir -p completions/fish
$(PYTHON) devscripts/fish-completion.py
@@ -172,3 +199,45 @@ CONTRIBUTORS: Changelog.md
echo 'Updating $@ from git commit history' ; \
$(PYTHON) devscripts/make_changelog.py -v -c > /dev/null ; \
fi
# The following EJS_-prefixed variables are auto-generated by devscripts/update_ejs.py
# DO NOT EDIT!
EJS_VERSION = 0.3.0
EJS_WHEEL_NAME = yt_dlp_ejs-0.3.0-py3-none-any.whl
EJS_WHEEL_HASH = sha256:abbf269fa1674cab7b7b266e51e89e0e60b01a11a0fdf3cd63528683190cdd07
EJS_PY_FOLDERS = yt_dlp_ejs yt_dlp_ejs/yt yt_dlp_ejs/yt/solver
EJS_PY_FILES = yt_dlp_ejs/__init__.py yt_dlp_ejs/_version.py yt_dlp_ejs/yt/__init__.py yt_dlp_ejs/yt/solver/__init__.py
EJS_JS_FOLDERS = yt_dlp_ejs/yt/solver
EJS_JS_FILES = yt_dlp_ejs/yt/solver/core.min.js yt_dlp_ejs/yt/solver/lib.min.js
yt-dlp-extra: current-ejs-version .ejs-$(EJS_VERSION) $(EJS_PY_FILES) $(EJS_JS_FILES) yt-dlp.zip
mkdir -p zip
for d in $(EJS_PY_FOLDERS) ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\
done
for d in $(EJS_JS_FOLDERS) ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.js zip/$$d/ ;\
done
(cd zip && touch -t 200001010101 $(EJS_PY_FILES) $(EJS_JS_FILES))
(cd zip && zip -q ../yt-dlp.zip $(EJS_PY_FILES) $(EJS_JS_FILES))
cp -pP yt_dlp/__main__.py zip/
touch -t 200001010101 zip/__main__.py
(cd zip && zip -q ../yt-dlp.zip __main__.py)
echo '#!$(PYTHON)' > yt-dlp
cat yt-dlp.zip >> yt-dlp
rm yt-dlp.zip
chmod a+x yt-dlp
rm -rf zip
.ejs-$(EJS_VERSION):
@echo Downloading yt-dlp-ejs
@echo "yt-dlp-ejs==$(EJS_VERSION) --hash $(EJS_WHEEL_HASH)" > .ejs-requirements.txt
$(PYTHON) -m pip download -d ./build --no-deps --require-hashes -r .ejs-requirements.txt
unzip -o build/$(EJS_WHEEL_NAME) "yt_dlp_ejs/*"
@touch .ejs-$(EJS_VERSION)
current-ejs-version:
rm -rf .ejs-*
touch .ejs-$$($(PYTHON) -c 'import sys; sys.path = [""]; from yt_dlp_ejs import version; print(version)' 2>/dev/null)

View File

@@ -145,9 +145,11 @@ While yt-dlp is licensed under the [Unlicense](LICENSE), many of the release fil
Most notably, the PyInstaller-bundled executables include GPLv3+ licensed code, and as such the combined work is licensed under [GPLv3+](https://www.gnu.org/licenses/gpl-3.0.html).
See [THIRD_PARTY_LICENSES.txt](THIRD_PARTY_LICENSES.txt) for details.
The zipimport Unix executable (`yt-dlp`) contains [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) licensed code from [`meriyah`](https://github.com/meriyah/meriyah) and [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) licensed code from [`astring`](https://github.com/davidbonnet/astring).
The zipimport binary (`yt-dlp`), the source tarball (`yt-dlp.tar.gz`), and the PyPI source distribution & wheel only contain code licensed under the [Unlicense](LICENSE).
See [THIRD_PARTY_LICENSES.txt](THIRD_PARTY_LICENSES.txt) for more details.
The git repository, the source tarball (`yt-dlp.tar.gz`), the PyPI source distribution and the PyPI built distribution (wheel) only contain code licensed under the [Unlicense](LICENSE).
<!-- MANPAGE: END EXCLUDED SECTION -->
@@ -201,7 +203,7 @@ Python versions 3.10+ (CPython) and 3.11+ (PyPy) are supported. Other versions a
On Windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually.
-->
While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended
While all the other dependencies are optional, `ffmpeg`, `ffprobe`, `yt-dlp-ejs` and a JavaScript runtime are highly recommended
### Strongly recommended
@@ -211,6 +213,10 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
**Important**: What you need is ffmpeg *binary*, **NOT** [the Python package of the same name](https://pypi.org/project/ffmpeg)
* [**yt-dlp-ejs**](https://github.com/yt-dlp/ejs) - Required for deciphering YouTube n/sig values. Licensed under [Unlicense](https://github.com/yt-dlp/ejs/blob/main/LICENSE), bundles [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) and [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) components.
A JavaScript runtime like [**deno**](https://deno.land) (recommended), [**node.js**](https://nodejs.org), [**bun**](https://bun.sh), or [**QuickJS**](https://bellard.org/quickjs/) is also required to run yt-dlp-ejs. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/EJS).
### Networking
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
@@ -235,7 +241,7 @@ The following provide support for impersonating browser requests. This may be re
### Misc
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in some extractors where JavaScript needs to be run. No longer used for YouTube. To be deprecated in the near future. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
* Any external downloader that you want to use with `--downloader`
@@ -362,6 +368,26 @@ Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords
--no-plugin-dirs Clear plugin directories to search,
including defaults and those provided by
previous --plugin-dirs
--js-runtimes RUNTIME[:PATH] Additional JavaScript runtime to enable,
with an optional path to the runtime
location. This option can be used multiple
times to enable multiple runtimes. Supported
runtimes: deno, node, bun, quickjs. By
default, only "deno" runtime is enabled.
--no-js-runtimes Clear JavaScript runtimes to enable,
including defaults and those provided by
previous --js-runtimes
--remote-components COMPONENT Remote components to allow yt-dlp to fetch
when required. You can use this option
multiple times to allow multiple components.
Supported values: ejs:npm (external
JavaScript components from npm), ejs:github
(external JavaScript components from yt-dlp-
ejs GitHub). By default, no remote
components are allowed.
--no-remote-components Disallow fetching of all remote components,
including any previously allowed by
--remote-components or defaults.
--flat-playlist Do not extract a playlist's URL result
entries; some entry metadata may be missing
and downloading may be bypassed
@@ -1814,7 +1840,7 @@ The following extractors use this feature:
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `android_sdkless,tv,web_safari,web` is used. `android_sdkless` is omitted if cookies are passed. If premium cookies are passed, `tv,web_creator,web_safari,web` is used instead. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,android_sdkless,web` is used. If no JavaScript runtime is available, then `android_sdkless,web_safari,web` is used. If logged-in cookies are passed to yt-dlp, then `tv,web_safari,web` is used for free accounts and `tv,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
@@ -1833,6 +1859,10 @@ The following extractors use this feature:
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
* `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds
* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)
#### youtube-ejs
* `jitless`: Run suported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered unsecure. Either `true` or `false` (default)
#### youtubepot-webpo
* `bind_to_visitor_id`: Whether to use the Visitor ID instead of Visitor Data for caching WebPO tokens. Either `true` (default) or `false`

View File

@@ -4431,3 +4431,43 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
Meriyah | ISC
URL: https://github.com/meriyah/meriyah
--------------------------------------------------------------------------------
ISC License
Copyright (c) 2019 and later, KFlash and others.
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--------------------------------------------------------------------------------
Astring | MIT
URL: https://github.com/davidbonnet/astring/
--------------------------------------------------------------------------------
Copyright (c) 2015, David Bonnet <david@bonnet.cc>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -271,6 +271,19 @@ DEPENDENCIES: list[Dependency] = [
license_url='https://raw.githubusercontent.com/python-websockets/websockets/refs/heads/main/LICENSE',
project_url='https://websockets.readthedocs.io/',
),
# Dependencies of yt-dlp-ejs
Dependency(
name='Meriyah',
license='ISC',
license_url='https://raw.githubusercontent.com/meriyah/meriyah/refs/heads/main/LICENSE.md',
project_url='https://github.com/meriyah/meriyah',
),
Dependency(
name='Astring',
license='MIT',
license_url='https://raw.githubusercontent.com/davidbonnet/astring/refs/heads/main/LICENSE',
project_url='https://github.com/davidbonnet/astring/',
),
]

164
devscripts/update_ejs.py Normal file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
from __future__ import annotations
import contextlib
import io
import json
import hashlib
import pathlib
import urllib.request
import zipfile
TEMPLATE = '''\
# This file is generated by devscripts/update_ejs.py. DO NOT MODIFY!
VERSION = {version!r}
HASHES = {{
{hash_mapping}
}}
'''
PREFIX = ' "yt-dlp-ejs=='
BASE_PATH = pathlib.Path(__file__).parent.parent
PYPROJECT_PATH = BASE_PATH / 'pyproject.toml'
PACKAGE_PATH = BASE_PATH / 'yt_dlp/extractor/youtube/jsc/_builtin/vendor'
RELEASE_URL = 'https://api.github.com/repos/yt-dlp/ejs/releases/latest'
ASSETS = {
'yt.solver.lib.js': False,
'yt.solver.lib.min.js': False,
'yt.solver.deno.lib.js': True,
'yt.solver.bun.lib.js': True,
'yt.solver.core.min.js': False,
'yt.solver.core.js': True,
}
MAKEFILE_PATH = BASE_PATH / 'Makefile'
def request(url: str):
return contextlib.closing(urllib.request.urlopen(url))
def makefile_variables(
version: str | None = None,
name: str | None = None,
digest: str | None = None,
data: bytes | None = None,
keys_only: bool = False,
) -> dict[str, str | None]:
assert keys_only or all(arg is not None for arg in (version, name, digest, data))
return {
'EJS_VERSION': None if keys_only else version,
'EJS_WHEEL_NAME': None if keys_only else name,
'EJS_WHEEL_HASH': None if keys_only else digest,
'EJS_PY_FOLDERS': None if keys_only else list_wheel_contents(data, 'py', files=False),
'EJS_PY_FILES': None if keys_only else list_wheel_contents(data, 'py', folders=False),
'EJS_JS_FOLDERS': None if keys_only else list_wheel_contents(data, 'js', files=False),
'EJS_JS_FILES': None if keys_only else list_wheel_contents(data, 'js', folders=False),
}
def list_wheel_contents(
wheel_data: bytes,
suffix: str | None = None,
folders: bool = True,
files: bool = True,
) -> str:
assert folders or files, 'at least one of "folders" or "files" must be True'
path_gen = (zinfo.filename for zinfo in zipfile.ZipFile(io.BytesIO(wheel_data)).infolist())
filtered = filter(lambda path: path.startswith('yt_dlp_ejs/'), path_gen)
if suffix:
filtered = filter(lambda path: path.endswith(f'.{suffix}'), filtered)
files_list = list(filtered)
if not folders:
return ' '.join(files_list)
folders_list = list(dict.fromkeys(path.rpartition('/')[0] for path in files_list))
if not files:
return ' '.join(folders_list)
return ' '.join(folders_list + files_list)
def main():
current_version = None
with PYPROJECT_PATH.open() as file:
for line in file:
if not line.startswith(PREFIX):
continue
current_version, _, _ = line.removeprefix(PREFIX).partition('"')
if not current_version:
print('yt-dlp-ejs dependency line could not be found')
return
makefile_info = makefile_variables(keys_only=True)
prefixes = tuple(f'{key} = ' for key in makefile_info)
with MAKEFILE_PATH.open() as file:
for line in file:
if not line.startswith(prefixes):
continue
key, _, val = line.partition(' = ')
makefile_info[key] = val.rstrip()
with request(RELEASE_URL) as resp:
info = json.load(resp)
version = info['tag_name']
if version == current_version:
print(f'yt-dlp-ejs is up to date! ({version})')
return
print(f'Updating yt-dlp-ejs from {current_version} to {version}')
hashes = []
wheel_info = {}
for asset in info['assets']:
name = asset['name']
is_wheel = name.startswith('yt_dlp_ejs-') and name.endswith('.whl')
if not is_wheel and name not in ASSETS:
continue
with request(asset['browser_download_url']) as resp:
data = resp.read()
# verify digest from github
digest = asset['digest']
algo, _, expected = digest.partition(':')
hexdigest = hashlib.new(algo, data).hexdigest()
assert hexdigest == expected, f'downloaded attest mismatch ({hexdigest!r} != {expected!r})'
if is_wheel:
wheel_info = makefile_variables(version, name, digest, data)
continue
# calculate sha3-512 digest
asset_hash = hashlib.sha3_512(data).hexdigest()
hashes.append(f' {name!r}: {asset_hash!r},')
if ASSETS[name]:
(PACKAGE_PATH / name).write_bytes(data)
hash_mapping = '\n'.join(hashes)
for asset_name in ASSETS:
assert asset_name in hash_mapping, f'{asset_name} not found in release'
assert all(wheel_info.get(key) for key in makefile_info), 'wheel info not found in release'
(PACKAGE_PATH / '_info.py').write_text(TEMPLATE.format(
version=version,
hash_mapping=hash_mapping,
))
content = PYPROJECT_PATH.read_text()
updated = content.replace(PREFIX + current_version, PREFIX + version)
PYPROJECT_PATH.write_text(updated)
makefile = MAKEFILE_PATH.read_text()
for key in wheel_info:
makefile = makefile.replace(f'{key} = {makefile_info[key]}', f'{key} = {wheel_info[key]}')
MAKEFILE_PATH.write_text(makefile)
if __name__ == '__main__':
main()

View File

@@ -56,6 +56,7 @@ default = [
"requests>=2.32.2,<3",
"urllib3>=2.0.2,<3",
"websockets>=13.0",
"yt-dlp-ejs==0.3.0",
]
curl-cffi = [
"curl-cffi>=0.5.10,!=0.6.*,!=0.7.*,!=0.8.*,!=0.9.*,<0.14; implementation_name=='cpython'",
@@ -122,7 +123,12 @@ artifacts = [
[tool.hatch.build.targets.wheel]
packages = ["yt_dlp"]
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
artifacts = [
"/yt_dlp/extractor/lazy_extractors.py",
]
exclude = [
"/yt_dlp/**/*.md",
]
[tool.hatch.build.targets.wheel.shared-data]
"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp"

60
test/test_jsc/conftest.py Normal file
View File

@@ -0,0 +1,60 @@
import re
import pathlib
import pytest
import yt_dlp.globals
from yt_dlp import YoutubeDL
from yt_dlp.extractor.common import InfoExtractor
_TESTDATA_PATH = pathlib.Path(__file__).parent.parent / 'testdata/sigs'
_player_re = re.compile(r'^.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$')
_player_id_trans = str.maketrans(dict.fromkeys('/.-', '_'))
@pytest.fixture
def ie() -> InfoExtractor:
runtime_names = yt_dlp.globals.supported_js_runtimes.value
ydl = YoutubeDL({'js_runtimes': {key: {} for key in runtime_names}})
ie = ydl.get_info_extractor('Youtube')
def _load_player(video_id, player_url, fatal=True):
match = _player_re.match(player_url)
test_id = match.group('id').translate(_player_id_trans)
cached_file = _TESTDATA_PATH / f'player-{test_id}.js'
if cached_file.exists():
return cached_file.read_text()
if code := ie._download_webpage(player_url, video_id, fatal=fatal):
_TESTDATA_PATH.mkdir(exist_ok=True, parents=True)
cached_file.write_text(code)
return code
return None
ie._load_player = _load_player
return ie
class MockLogger:
def trace(self, message: str):
print(f'trace: {message}')
def debug(self, message: str, *, once=False):
print(f'debug: {message}')
def info(self, message: str):
print(f'info: {message}')
def warning(self, message: str, *, once=False):
print(f'warning: {message}')
def error(self, message: str):
print(f'error: {message}')
@pytest.fixture
def logger():
return MockLogger()

View File

@@ -0,0 +1,128 @@
from __future__ import annotations
import dataclasses
import enum
import importlib.util
import json
import pytest
from yt_dlp.extractor.youtube.jsc.provider import (
JsChallengeRequest,
JsChallengeType,
JsChallengeProviderResponse,
JsChallengeResponse,
NChallengeInput,
NChallengeOutput,
SigChallengeInput,
SigChallengeOutput,
)
from yt_dlp.extractor.youtube.jsc._builtin.bun import BunJCP
from yt_dlp.extractor.youtube.jsc._builtin.deno import DenoJCP
from yt_dlp.extractor.youtube.jsc._builtin.node import NodeJCP
from yt_dlp.extractor.youtube.jsc._builtin.quickjs import QuickJSJCP
_has_ejs = bool(importlib.util.find_spec('yt_dlp_ejs'))
pytestmark = pytest.mark.skipif(not _has_ejs, reason='yt-dlp-ejs not available')
class Variant(enum.Enum):
main = 'player_ias.vflset/en_US/base.js'
tcc = 'player_ias_tcc.vflset/en_US/base.js'
tce = 'player_ias_tce.vflset/en_US/base.js'
es5 = 'player_es5.vflset/en_US/base.js'
es6 = 'player_es6.vflset/en_US/base.js'
tv = 'tv-player-ias.vflset/tv-player-ias.js'
tv_es6 = 'tv-player-es6.vflset/tv-player-es6.js'
phone = 'player-plasma-ias-phone-en_US.vflset/base.js'
tablet = 'player-plasma-ias-tablet-en_US.vflset/base.js'
@dataclasses.dataclass
class Challenge:
player: str
variant: Variant
type: JsChallengeType
values: dict[str, str] = dataclasses.field(default_factory=dict)
def url(self, /):
return f'https://www.youtube.com/s/player/{self.player}/{self.variant.value}'
CHALLENGES: list[Challenge] = [
Challenge('3d3ba064', Variant.tce, JsChallengeType.N, {
'ZdZIqFPQK-Ty8wId': 'qmtUsIz04xxiNW',
'4GMrWHyKI5cEvhDO': 'N9gmEX7YhKTSmw',
}),
Challenge('3d3ba064', Variant.tce, JsChallengeType.SIG, {
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt':
'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3gqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kNyBf6HPuAuCduh-a7O',
}),
Challenge('5ec65609', Variant.tce, JsChallengeType.N, {
'0eRGgQWJGfT5rFHFj': '4SvMpDQH-vBJCw',
}),
Challenge('5ec65609', Variant.tce, JsChallengeType.SIG, {
'AAJAJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grH0rTMICA1mmDc0HoXgW3CAiAQQ4=CspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=I':
'AJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grI0rTMICA1mmDc0HoXgW3CAiAQQ4HCspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ==',
}),
Challenge('6742b2b9', Variant.tce, JsChallengeType.N, {
'_HPB-7GFg1VTkn9u': 'qUAsPryAO_ByYg',
'K1t_fcB6phzuq2SF': 'Y7PcOt3VE62mog',
}),
Challenge('6742b2b9', Variant.tce, JsChallengeType.SIG, {
'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJAA':
'AJfQdSswRAIgMVVvrovTbw6UNh99kPa4D_XQjGT4qYu7S6SHM8EjoCACIEQnz-nKN5RgG6iUTnNJC58csYPSrnS_SzricuUMJZGM',
}),
Challenge('2b83d2e0', Variant.main, JsChallengeType.N, {
'0eRGgQWJGfT5rFHFj': 'euHbygrCMLksxd',
}),
Challenge('2b83d2e0', Variant.main, JsChallengeType.SIG, {
'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJA':
'-MGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKnMznQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJ',
}),
Challenge('638ec5c6', Variant.main, JsChallengeType.N, {
'ZdZIqFPQK-Ty8wId': '1qov8-KM-yH',
}),
Challenge('638ec5c6', Variant.main, JsChallengeType.SIG, {
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt':
'MhudCuAuP-6fByOk1_GNXN7gNHHShjyXS2VOgsEItAJz0tipeav0OmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
}),
]
requests: list[JsChallengeRequest] = []
responses: list[JsChallengeProviderResponse] = []
for test in CHALLENGES:
input_type, output_type = {
JsChallengeType.N: (NChallengeInput, NChallengeOutput),
JsChallengeType.SIG: (SigChallengeInput, SigChallengeOutput),
}[test.type]
request = JsChallengeRequest(test.type, input_type(test.url(), list(test.values.keys())), test.player)
requests.append(request)
responses.append(JsChallengeProviderResponse(request, JsChallengeResponse(test.type, output_type(test.values))))
@pytest.fixture(params=[BunJCP, DenoJCP, NodeJCP, QuickJSJCP])
def jcp(request, ie, logger):
obj = request.param(ie, logger, None)
if not obj.is_available():
pytest.skip(f'{obj.PROVIDER_NAME} is not available')
obj.is_dev = True
return obj
@pytest.mark.download
def test_bulk_requests(jcp):
assert list(jcp.bulk_solve(requests)) == responses
@pytest.mark.download
def test_using_cached_player(jcp):
first_player_requests = requests[:3]
player = jcp._get_player(first_player_requests[0].video_id, first_player_requests[0].input.player_url)
initial = json.loads(jcp._run_js_runtime(jcp._construct_stdin(player, False, first_player_requests)))
preprocessed = initial.pop('preprocessed_player')
result = json.loads(jcp._run_js_runtime(jcp._construct_stdin(preprocessed, True, first_player_requests)))
assert initial == result

View File

@@ -0,0 +1,194 @@
import pytest
from yt_dlp.extractor.youtube.jsc.provider import (
JsChallengeProvider,
JsChallengeRequest,
JsChallengeProviderResponse,
JsChallengeProviderRejectedRequest,
JsChallengeType,
JsChallengeResponse,
NChallengeOutput,
NChallengeInput,
JsChallengeProviderError,
register_provider,
register_preference,
)
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider
from yt_dlp.utils import ExtractorError
from yt_dlp.extractor.youtube.jsc._registry import _jsc_preferences, _jsc_providers
class ExampleJCP(JsChallengeProvider):
PROVIDER_NAME = 'example-provider'
PROVIDER_VERSION = '0.0.1'
BUG_REPORT_LOCATION = 'https://example.com/issues'
_SUPPORTED_TYPES = [JsChallengeType.N]
def is_available(self) -> bool:
return True
def _real_bulk_solve(self, requests):
for request in requests:
results = dict.fromkeys(request.input.challenges, 'example-solution')
response = JsChallengeResponse(
type=request.type,
output=NChallengeOutput(results=results))
yield JsChallengeProviderResponse(request=request, response=response)
PLAYER_URL = 'https://example.com/player.js'
class TestJsChallengeProvider:
# note: some test covered in TestPoTokenProvider which shares the same base class
def test_base_type(self):
assert issubclass(JsChallengeProvider, IEContentProvider)
def test_create_provider_missing_bulk_solve_method(self, ie, logger):
class MissingMethodsJCP(JsChallengeProvider):
def is_available(self) -> bool:
return True
with pytest.raises(TypeError, match='bulk_solve'):
MissingMethodsJCP(ie=ie, logger=logger, settings={})
def test_create_provider_missing_available_method(self, ie, logger):
class MissingMethodsJCP(JsChallengeProvider):
def _real_bulk_solve(self, requests):
raise JsChallengeProviderRejectedRequest('Not implemented')
with pytest.raises(TypeError, match='is_available'):
MissingMethodsJCP(ie=ie, logger=logger, settings={})
def test_barebones_provider(self, ie, logger):
class BarebonesProviderJCP(JsChallengeProvider):
def is_available(self) -> bool:
return True
def _real_bulk_solve(self, requests):
raise JsChallengeProviderRejectedRequest('Not implemented')
provider = BarebonesProviderJCP(ie=ie, logger=logger, settings={})
assert provider.PROVIDER_NAME == 'BarebonesProvider'
assert provider.PROVIDER_KEY == 'BarebonesProvider'
assert provider.PROVIDER_VERSION == '0.0.0'
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
def test_example_provider_success(self, ie, logger):
provider = ExampleJCP(ie=ie, logger=logger, settings={})
request = JsChallengeRequest(
type=JsChallengeType.N,
input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
request_two = JsChallengeRequest(
type=JsChallengeType.N,
input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge-2']))
responses = list(provider.bulk_solve([request, request_two]))
assert len(responses) == 2
assert all(isinstance(r, JsChallengeProviderResponse) for r in responses)
assert responses == [
JsChallengeProviderResponse(
request=request,
response=JsChallengeResponse(
type=JsChallengeType.N,
output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
),
),
JsChallengeProviderResponse(
request=request_two,
response=JsChallengeResponse(
type=JsChallengeType.N,
output=NChallengeOutput(results={'example-challenge-2': 'example-solution'}),
),
),
]
def test_provider_unsupported_challenge_type(self, ie, logger):
provider = ExampleJCP(ie=ie, logger=logger, settings={})
request_supported = JsChallengeRequest(
type=JsChallengeType.N,
input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
request_unsupported = JsChallengeRequest(
type=JsChallengeType.SIG,
input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge']))
responses = list(provider.bulk_solve([request_supported, request_unsupported, request_supported]))
assert len(responses) == 3
# Requests are validated first before continuing to _real_bulk_solve
assert isinstance(responses[0], JsChallengeProviderResponse)
assert isinstance(responses[0].error, JsChallengeProviderRejectedRequest)
assert responses[0].request is request_unsupported
assert str(responses[0].error) == 'JS Challenge type "JsChallengeType.SIG" is not supported by example-provider'
assert responses[1:] == [
JsChallengeProviderResponse(
request=request_supported,
response=JsChallengeResponse(
type=JsChallengeType.N,
output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
),
),
JsChallengeProviderResponse(
request=request_supported,
response=JsChallengeResponse(
type=JsChallengeType.N,
output=NChallengeOutput(results={'example-challenge': 'example-solution'}),
),
),
]
def test_provider_get_player(self, ie, logger):
ie._load_player = lambda video_id, player_url, fatal: (video_id, player_url, fatal)
provider = ExampleJCP(ie=ie, logger=logger, settings={})
assert provider._get_player('video123', PLAYER_URL) == ('video123', PLAYER_URL, True)
def test_provider_get_player_error(self, ie, logger):
def raise_error(video_id, player_url, fatal):
raise ExtractorError('Failed to load player')
ie._load_player = raise_error
provider = ExampleJCP(ie=ie, logger=logger, settings={})
with pytest.raises(JsChallengeProviderError, match='Failed to load player for JS challenge'):
provider._get_player('video123', PLAYER_URL)
def test_require_class_end_with_suffix(self, ie, logger):
class InvalidSuffix(JsChallengeProvider):
PROVIDER_NAME = 'invalid-suffix'
def _real_bulk_solve(self, requests):
raise JsChallengeProviderRejectedRequest('Not implemented')
def is_available(self) -> bool:
return True
provider = InvalidSuffix(ie=ie, logger=logger, settings={})
with pytest.raises(AssertionError):
provider.PROVIDER_KEY # noqa: B018
def test_register_provider(ie):
@register_provider
class UnavailableProviderJCP(JsChallengeProvider):
def is_available(self) -> bool:
return False
def _real_bulk_solve(self, requests):
raise JsChallengeProviderRejectedRequest('Not implemented')
assert _jsc_providers.value.get('UnavailableProvider') == UnavailableProviderJCP
_jsc_providers.value.pop('UnavailableProvider')
def test_register_preference(ie):
before = len(_jsc_preferences.value)
@register_preference(ExampleJCP)
def unavailable_preference(*args, **kwargs):
return 1
assert len(_jsc_preferences.value) == before + 1

View File

@@ -1,6 +1,6 @@
import pytest
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, configuration_arg
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.utils.networking import HTTPHeaderDict
from yt_dlp.extractor.youtube.pot.provider import (
@@ -627,3 +627,13 @@ def test_logger_log_level(logger):
assert logger.LogLevel('debuG') == logger.LogLevel.DEBUG
assert logger.LogLevel(10) == logger.LogLevel.DEBUG
assert logger.LogLevel('UNKNOWN') == logger.LogLevel.INFO
def test_configuration_arg():
config = {'abc': ['123D'], 'xyz': ['456a', '789B']}
assert configuration_arg(config, 'abc') == ['123d']
assert configuration_arg(config, 'abc', default=['default']) == ['123d']
assert configuration_arg(config, 'ABC', default=['default']) == ['default']
assert configuration_arg(config, 'abc', casesense=True) == ['123D']
assert configuration_arg(config, 'xyz', casesense=False) == ['456a', '789b']

View File

@@ -1,504 +0,0 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import contextlib
import re
import string
import urllib.request
from test.helper import FakeYDL, is_download_test
from yt_dlp.extractor import YoutubeIE
from yt_dlp.jsinterp import JSInterpreter
_SIG_TESTS = [
(
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
86,
'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
85,
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
90,
']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
84,
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
84,
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
83,
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
),
(
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
),
(
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/e12fbea4/player_ias.vflset/en_US/base.js',
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a',
),
(
'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js',
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-',
),
(
'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js',
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-',
),
(
'https://www.youtube.com/s/player/5ec65609/player_ias_tcc.vflset/en_US/base.js',
'AAJAJfQdSswRAIgNSN0GDUcHnCIXkKcF61yLBgDHiX1sUhOJdY4_GxunRYCIDeYNYP_16mQTPm5f1OVq3oV1ijUNYPjP4iUSMAjO9bZ',
'AJfQdSswRAIgNSN0GDUcHnCIXkKcF61ZLBgDHiX1sUhOJdY4_GxunRYCIDyYNYP_16mQTPm5f1OVq3oV1ijUNYPjP4iUSMAjO9be',
),
]
_NSIG_TESTS = [
(
'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js',
'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg',
),
(
'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
),
(
'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
),
(
'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
),
(
'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
),
(
'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
),
(
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
),
(
'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
),
(
'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
),
(
'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
),
(
'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
),
(
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
),
(
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
),
(
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
),
(
'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
),
(
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
),
(
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
),
(
'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
),
(
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
),
(
'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
),
(
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
),
(
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
'1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
),
(
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
),
(
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
),
(
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
),
(
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
),
(
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
),
(
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
),
(
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
),
(
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
),
(
'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js',
'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
),
(
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
),
(
'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
),
(
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
),
(
'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js',
'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h',
),
(
'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js',
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
),
(
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
),
(
'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
),
(
'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
),
(
'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
),
(
'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
),
(
'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
),
(
'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
),
(
'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
),
(
'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
),
(
'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
),
(
'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
),
(
'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js',
'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA',
),
(
'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js',
'0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg',
),
(
'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js',
'0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg',
),
(
'https://www.youtube.com/s/player/5ec65609/player_ias_tcc.vflset/en_US/base.js',
'6l5CTNx4AzIqH4MXM', 'NupToduxHBew1g',
),
]
@is_download_test
class TestPlayerInfo(unittest.TestCase):
def test_youtube_extract_player_info(self):
PLAYER_URLS = (
('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'),
('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'),
# obsolete
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
)
for player_url, expected_player_id in PLAYER_URLS:
player_id = YoutubeIE._extract_player_info(player_url)
self.assertEqual(player_id, expected_player_id)
@is_download_test
class TestSignature(unittest.TestCase):
def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
if not os.path.exists(self.TESTDATA_DIR):
os.mkdir(self.TESTDATA_DIR)
def tearDown(self):
with contextlib.suppress(OSError):
for f in os.listdir(self.TESTDATA_DIR):
os.remove(f)
def t_factory(name, sig_func, url_pattern):
def make_tfunc(url, sig_input, expected_sig):
m = url_pattern.match(url)
assert m, f'{url!r} should follow URL format'
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
def test_func(self):
basename = f'player-{test_id}.js'
fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn):
urllib.request.urlretrieve(url, fn)
with open(fn, encoding='utf-8') as testf:
jscode = testf.read()
self.assertEqual(sig_func(jscode, sig_input, url), expected_sig)
test_func.__name__ = f'test_{name}_js_{test_id}'
setattr(TestSignature, test_func.__name__, test_func)
return make_tfunc
def signature(jscode, sig_input, player_url):
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode, player_url)
src_sig = (
str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input)
return func(src_sig)
def n_sig(jscode, sig_input, player_url):
ie = YoutubeIE(FakeYDL())
funcname = ie._extract_n_function_name(jscode, player_url=player_url)
jsi = JSInterpreter(jscode)
func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode, player_url))
return func([sig_input])
make_sig_test = t_factory(
'signature', signature,
re.compile(r'''(?x)
.+(?:
/player/(?P<id>[a-zA-Z0-9_/.-]+)|
/html5player-(?:en_US-)?(?P<compat_id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?
)\.js$'''))
for test_spec in _SIG_TESTS:
make_sig_test(*test_spec)
make_nsig_test = t_factory(
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$'))
for test_spec in _NSIG_TESTS:
make_nsig_test(*test_spec)
if __name__ == '__main__':
unittest.main()

View File

@@ -42,6 +42,8 @@ from .globals import (
plugin_pps,
all_plugins_loaded,
plugin_dirs,
supported_js_runtimes,
supported_remote_components,
)
from .minicurses import format_text
from .networking import HEADRequest, Request, RequestDirector
@@ -533,6 +535,18 @@ class YoutubeDL:
See "EXTRACTOR ARGUMENTS" for details.
Argument values must always be a list of string(s).
E.g. {'youtube': {'skip': ['dash', 'hls']}}
js_runtimes: A dictionary of JavaScript runtime keys (in lower case) to enable
and a dictionary of additional configuration for the runtime.
Currently supported runtimes are 'deno', 'node', 'bun', and 'quickjs'.
If None, the default runtime of "deno" will be enabled.
The runtime configuration dictionary can have the following keys:
- path: Path to the executable (optional)
E.g. {'deno': {'path': '/path/to/deno'}
remote_components: A list of remote components that are allowed to be fetched when required.
Supported components:
- ejs:npm (external JavaScript components from npm)
- ejs:github (external JavaScript components from yt-dlp-ejs GitHub)
By default, no remote components are allowed to be fetched.
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
The following options are deprecated and may be removed in the future:
@@ -717,6 +731,13 @@ class YoutubeDL:
else:
raise
# Note: this must be after plugins are loaded
self.params['js_runtimes'] = self.params.get('js_runtimes', {'deno': {}})
self._clean_js_runtimes(self.params['js_runtimes'])
self.params['remote_components'] = set(self.params.get('remote_components', ()))
self._clean_remote_components(self.params['remote_components'])
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
@@ -829,6 +850,36 @@ class YoutubeDL:
self.archive = preload_download_archive(self.params.get('download_archive'))
def _clean_js_runtimes(self, runtimes):
if not (
isinstance(runtimes, dict)
and all(isinstance(k, str) and (v is None or isinstance(v, dict)) for k, v in runtimes.items())
):
raise ValueError('Invalid js_runtimes format, expected a dict of {runtime: {config}}')
if unsupported_runtimes := runtimes.keys() - supported_js_runtimes.value.keys():
self.report_warning(
f'Ignoring unsupported JavaScript runtime(s): {", ".join(unsupported_runtimes)}.'
f' Supported runtimes: {", ".join(supported_js_runtimes.value.keys())}.')
for rt in unsupported_runtimes:
runtimes.pop(rt)
def _clean_remote_components(self, remote_components: set):
if unsupported_remote_components := set(remote_components) - set(supported_remote_components.value):
self.report_warning(
f'Ignoring unsupported remote component(s): {", ".join(unsupported_remote_components)}.'
f' Supported remote components: {", ".join(supported_remote_components.value)}.')
for rt in unsupported_remote_components:
remote_components.remove(rt)
@functools.cached_property
def _js_runtimes(self):
runtimes = {}
for name, config in self.params.get('js_runtimes', {}).items():
runtime_cls = supported_js_runtimes.value.get(name)
runtimes[name] = runtime_cls(path=config.get('path')) if runtime_cls else None
return runtimes
def warn_if_short_id(self, argv):
# short YouTube ID starting with dash?
idxs = [
@@ -4064,6 +4115,18 @@ class YoutubeDL:
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
})) or 'none'))
if not self.params.get('js_runtimes'):
write_debug('JS runtimes: none (disabled)')
else:
write_debug('JS runtimes: %s' % (', '.join(sorted(
f'{name} (unknown)' if runtime is None
else join_nonempty(
runtime.info.name,
runtime.info.version + (' (unsupported)' if runtime.info.supported is False else ''),
)
for name, runtime in self._js_runtimes.items() if runtime is None or runtime.info is not None
)) or 'none'))
write_debug(f'Proxy map: {self.proxies}')
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')

View File

@@ -61,8 +61,15 @@ from .utils import (
shell_quote,
variadic,
write_string,
)
from .utils._utils import _UnsafeExtensionError
from .utils._jsruntime import (
BunJsRuntime as _BunJsRuntime,
DenoJsRuntime as _DenoJsRuntime,
NodeJsRuntime as _NodeJsRuntime,
QuickJsRuntime as _QuickJsRuntime,
)
from .YoutubeDL import YoutubeDL
@@ -773,6 +780,10 @@ def parse_options(argv=None):
else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
else None)
js_runtimes = {
runtime.lower(): {'path': path} for runtime, path in (
[*arg.split(':', 1), None][:2] for arg in opts.js_runtimes)}
return ParsedOptions(parser, opts, urls, {
'usenetrc': opts.usenetrc,
'netrc_location': opts.netrc_location,
@@ -940,6 +951,8 @@ def parse_options(argv=None):
'geo_bypass_country': opts.geo_bypass_country,
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
'useid': opts.useid or None,
'js_runtimes': js_runtimes,
'remote_components': opts.remote_components,
'warn_when_outdated': opts.update_self is None,
'_warnings': warnings,
'_deprecation_warnings': deprecation_warnings,
@@ -1081,6 +1094,16 @@ def main(argv=None):
from .extractor import gen_extractors, list_extractors
# Register JS runtimes and remote components
from .globals import supported_js_runtimes, supported_remote_components
supported_js_runtimes.value['deno'] = _DenoJsRuntime
supported_js_runtimes.value['node'] = _NodeJsRuntime
supported_js_runtimes.value['bun'] = _BunJsRuntime
supported_js_runtimes.value['quickjs'] = _QuickJsRuntime
supported_remote_components.value.append('ejs:github')
supported_remote_components.value.append('ejs:npm')
__all__ = [
'YoutubeDL',
'gen_extractors',

View File

@@ -34,3 +34,4 @@ print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
datas = collect_data_files('curl_cffi', includes=['cacert.pem'])
datas += collect_data_files('yt_dlp_ejs', includes=['**/*.js'])

View File

@@ -81,6 +81,12 @@ except ImportError:
from . import Cryptodome
try:
import yt_dlp_ejs
except ImportError:
yt_dlp_ejs = None
all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
available_dependencies = {k: v for k, v in all_dependencies.items() if v}

View File

@@ -387,7 +387,8 @@ def _fix_embedded_ytcfg(ytcfg):
def build_innertube_clients():
BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android')
# From highest to lowest priority
BASE_CLIENTS = ('tv', 'web', 'mweb', 'android', 'ios')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@@ -409,9 +410,6 @@ def build_innertube_clients():
if variant == 'embedded':
_fix_embedded_ytcfg(ytcfg)
ytcfg['priority'] -= 2
elif variant:
ytcfg['priority'] -= 3
build_innertube_clients()

View File

@@ -4,9 +4,7 @@ import collections
import datetime as dt
import functools
import itertools
import json
import math
import os.path
import random
import re
import sys
@@ -26,10 +24,11 @@ from ._base import (
_split_innertube_client,
short_client_name,
)
from .jsc._builtin.ejs import _EJS_WIKI_URL
from .jsc._director import initialize_jsc_director
from .jsc.provider import JsChallengeRequest, JsChallengeType, NChallengeInput, SigChallengeInput
from .pot._director import initialize_pot_director
from .pot.provider import PoTokenContext, PoTokenRequest
from ..openload import PhantomJSwrapper
from ...jsinterp import JSInterpreter, LocalNameSpace
from ...networking.exceptions import HTTPError
from ...utils import (
NO_DEFAULT,
@@ -39,13 +38,11 @@ from ...utils import (
clean_html,
datetime_from_str,
filesize_from_tbr,
filter_dict,
float_or_none,
format_field,
get_first,
int_or_none,
join_nonempty,
js_to_json,
mimetype2ext,
orderedSet,
parse_codecs,
@@ -148,10 +145,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
_DEFAULT_CLIENTS = ('android_sdkless', 'tv', 'web_safari', 'web')
_DEFAULT_CLIENTS = ('tv', 'android_sdkless', 'web')
_DEFAULT_JSLESS_CLIENTS = ('android_sdkless', 'web_safari', 'web')
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web_safari', 'web')
# Premium does not require POT (except for subtitles)
_DEFAULT_PREMIUM_CLIENTS = ('tv', 'web_creator', 'web_safari', 'web')
_DEFAULT_PREMIUM_CLIENTS = ('tv', 'web_creator', 'web')
_GEO_BYPASS = False
@@ -1720,8 +1718,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
}
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
_NSIG_FUNC_CACHE_ID = 'nsig func'
_DUMMY_STRING = 'dlp_wins'
@classmethod
def suitable(cls, url):
@@ -1741,6 +1737,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _real_initialize(self):
super()._real_initialize()
self._pot_director = initialize_pot_director(self)
self._jsc_director = initialize_jsc_director(self)
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
lock = threading.Lock()
@@ -1758,7 +1755,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
microformats = traverse_obj(
prs, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict)
_, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
_, live_status, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
is_live = live_status == 'is_live'
start_time = time.time()
@@ -2006,10 +2003,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js'))
return f'{player_id}-{variant}'
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
return '.'.join(str(len(part)) for part in example_sig.split('.'))
@classmethod
def _extract_player_info(cls, player_url):
for player_re in cls._PLAYER_INFO_RE:
@@ -2031,53 +2024,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._code_cache[player_js_key] = code
return self._code_cache.get(player_js_key)
def _extract_signature_function(self, video_id, player_url, example_sig):
# Read from filesystem cache
func_id = join_nonempty(
self._player_js_cache_key(player_url), self._signature_cache_id(example_sig))
assert os.path.basename(func_id) == func_id
def _sig_spec_cache_id(self, player_url, spec_id):
return join_nonempty(self._player_js_cache_key(player_url), str(spec_id))
self.write_debug(f'Extracting signature function {func_id}')
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.07.21'), None
def _load_sig_spec_from_cache(self, spec_cache_id):
# This is almost identical to _load_player_data_from_cache
# I hate it
if spec_cache_id in self._player_cache:
return self._player_cache[spec_cache_id]
spec = self.cache.load('youtube-sigfuncs', spec_cache_id, min_ver='2025.07.21')
if spec:
self._player_cache[spec_cache_id] = spec
return spec
if not cache_spec:
code = self._load_player(video_id, player_url)
if code:
res = self._parse_sig_js(code, player_url)
test_string = ''.join(map(chr, range(len(example_sig))))
cache_spec = [ord(c) for c in res(test_string)]
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
def _store_sig_spec_to_cache(self, spec_cache_id, spec):
if spec_cache_id not in self._player_cache:
self._player_cache[spec_cache_id] = spec
self.cache.store('youtube-sigfuncs', spec_cache_id, spec)
return lambda s: ''.join(s[i] for i in cache_spec)
def _load_player_data_from_cache(self, name, player_url):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
def _parse_sig_js(self, jscode, player_url):
# Examples where `sig` is funcname:
# sig=function(a){a=a.split(""); ... ;return a.join("")};
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
# {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
# sig=function(J){J=J.split(""); ... ;return J.join("")};
# ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
# {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
funcname = self._search_regex(
(r'\b(?P<var>[a-zA-Z0-9_$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)',
r'(?P<sig>[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
r'(?:\b|[^a-zA-Z0-9_$])(?P<sig>[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?',
# Old patterns
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
# Obsolete patterns
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig')
if data := self._player_cache.get(cache_id):
return data
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
jsi = JSInterpreter(jscode)
initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
return lambda s: initial_function([s])
data = self.cache.load(*cache_id, min_ver='2025.07.21')
if data:
self._player_cache[cache_id] = data
return data
def _cached(self, func, *cache_id):
def inner(*args, **kwargs):
@@ -2095,246 +2070,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret
return inner
def _load_player_data_from_cache(self, name, player_url):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
if data := self._player_cache.get(cache_id):
return data
data = self.cache.load(*cache_id, min_ver='2025.07.21')
if data:
self._player_cache[cache_id] = data
return data
def _store_player_data_to_cache(self, name, player_url, data):
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
if cache_id not in self._player_cache:
self.cache.store(*cache_id, data)
self._player_cache[cache_id] = data
def _decrypt_signature(self, s, video_id, player_url):
"""Turn the encrypted s field into a working signature"""
extract_sig = self._cached(
self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
func = extract_sig(video_id, player_url, s)
return func(s)
def _decrypt_nsig(self, s, video_id, player_url):
"""Turn the encrypted n field into a working signature"""
if player_url is None:
raise ExtractorError('Cannot decrypt nsig without player_url')
player_url = urljoin('https://www.youtube.com', player_url)
try:
jsi, _, func_code = self._extract_n_function_code(video_id, player_url)
except ExtractorError as e:
raise ExtractorError('Unable to extract nsig function code', cause=e)
try:
extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
ret = extract_nsig(jsi, func_code)(s)
except JSInterpreter.Exception as e:
try:
jsi = PhantomJSwrapper(self, timeout=5000)
except ExtractorError:
raise e
self.report_warning(
f'Native nsig extraction failed: Trying with PhantomJS\n'
f' n = {s} ; player = {player_url}', video_id)
self.write_debug(e, only_once=True)
args, func_body = func_code
ret = jsi.execute(
f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
video_id=video_id, note='Executing signature code').strip()
self.write_debug(f'Decrypted nsig {s} => {ret}')
# Only cache nsig func JS code to disk if successful, and only once
self._store_player_data_to_cache('nsig', player_url, func_code)
return ret
def _extract_n_function_name(self, jscode, player_url=None):
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
pattern = r'''(?x)
\{\s*return\s+%s\[%d\]\s*\+\s*(?P<argname>[a-zA-Z0-9_$]+)\s*\}
''' % (re.escape(varname), global_list.index(debug_str))
if match := re.search(pattern, jscode):
pattern = r'''(?x)
\{\s*\)%s\(\s*
(?:
(?P<funcname_a>[a-zA-Z0-9_$]+)\s*noitcnuf\s*
|noitcnuf\s*=\s*(?P<funcname_b>[a-zA-Z0-9_$]+)(?:\s+rav)?
)[;\n]
''' % re.escape(match.group('argname')[::-1])
if match := re.search(pattern, jscode[match.start()::-1]):
a, b = match.group('funcname_a', 'funcname_b')
return (a or b)[::-1]
self.write_debug(join_nonempty(
'Initial search was unable to find nsig function name',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
# Examples (with placeholders nfunc, narray, idx):
# * .get("n"))&&(b=nfunc(b)
# * .get("n"))&&(b=narray[idx](b)
# * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
# * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
# * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
funcname, idx = self._search_regex(
r'''(?x)
(?:
\.get\("n"\)\)&&\(b=|
(?:
b=String\.fromCharCode\(110\)|
(?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
)
(?:
,[a-zA-Z0-9_$]+\(a\))?,c=a\.
(?:
get\(b\)|
[a-zA-Z0-9_$]+\[b\]\|\|null
)\)&&\(c=|
\b(?P<var>[a-zA-Z0-9_$]+)=
)(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
(?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''',
jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
if not funcname:
self.report_warning(join_nonempty(
'Falling back to generic n function search',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
return self._search_regex(
r'''(?xs)
;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
\s*\{(?:(?!};).)+?return\s*(?P<q>["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''',
jscode, 'Initial JS player n function name', group='name')
elif not idx:
return funcname
return json.loads(js_to_json(self._search_regex(
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
def _interpret_player_js_global_var(self, jscode, player_url):
"""Returns tuple of: variable name string, variable value list"""
extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
varcode, varname, varvalue = extract_global_var(
r'''(?x)
(?P<q1>["\'])use\s+strict(?P=q1);\s*
(?P<code>
var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
(?P<value>
(?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
\.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
|\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
)
)[;,]
''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
if not varcode:
self.write_debug(join_nonempty(
'No global array variable found in player JS',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
return None, None
jsi = JSInterpreter(varcode)
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
# Fixup global array
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
if varname and global_list:
nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
else:
varname = self._DUMMY_STRING
global_list = []
# Fixup typeof check
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
fixed_code = re.sub(
fr'''(?x)
;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
(["\'])undefined\1|
{re.escape(varname)}\[{undefined_idx}\]
)\s*\)\s*return\s+{re.escape(argnames[0])};
''', ';', nsig_code)
if fixed_code == nsig_code:
self.write_debug(join_nonempty(
'No typeof statement found in nsig function code',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
# Fixup global funcs
jsi = JSInterpreter(fixed_code)
cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
try:
self._cached(
self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
except JSInterpreter.Exception:
self._player_cache.pop(cache_id, None)
global_funcnames = jsi._undefined_varnames
debug_names = []
jsi = JSInterpreter(jscode)
for func_name in global_funcnames:
try:
func_args, func_code = jsi.extract_function_code(func_name)
fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
debug_names.append(func_name)
except Exception:
self.report_warning(join_nonempty(
f'Unable to extract global nsig function {func_name} from player JS',
player_url and f' player = {player_url}', delim='\n'), only_once=True)
if debug_names:
self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
return argnames, fixed_code
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
func_code = self._load_player_data_from_cache('nsig', player_url)
jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode)
if func_code:
return jsi, player_id, func_code
func_name = self._extract_n_function_name(jscode, player_url=player_url)
# XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
return jsi, player_id, func_code
def _extract_n_function_from_code(self, jsi, func_code):
func = jsi.extract_function_from_code(*func_code)
def extract_nsig(s):
try:
ret = func([s])
except JSInterpreter.Exception:
raise
except Exception as e:
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
if ret.startswith('enhanced_except_') or ret.endswith(s):
raise JSInterpreter.Exception('Signature function returned an exception')
return ret
return extract_nsig
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
"""
Extract signatureTimestamp (sts)
Required to tell API what sig/player version is in use.
"""
CACHE_ENABLED = False # TODO: enable when preprocessed player JS cache is solved/enabled
player_sts_override = self._get_player_js_version()[0]
if player_sts_override:
return int(player_sts_override)
if sts := traverse_obj(ytcfg, ('STS', {int_or_none})):
sts = traverse_obj(ytcfg, ('STS', {int_or_none}))
if sts:
return sts
if not player_url:
@@ -2344,15 +2098,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.report_warning(error_msg)
return None
sts = self._load_player_data_from_cache('sts', player_url)
if sts:
if CACHE_ENABLED and (sts := self._load_player_data_from_cache('sts', player_url)):
return sts
if code := self._load_player(video_id, player_url, fatal=fatal):
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
'JS player signature timestamp', group='sts', fatal=fatal))
if sts:
if CACHE_ENABLED and sts:
self._store_player_data_to_cache('sts', player_url, sts)
return sts
@@ -3020,9 +2773,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data, is_premium_subscriber):
requested_clients = []
excluded_clients = []
js_runtime_available = any(p.is_available() for p in self._jsc_director.providers.values())
default_clients = (
self._DEFAULT_PREMIUM_CLIENTS if is_premium_subscriber
else self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated
else self._DEFAULT_JSLESS_CLIENTS if not js_runtime_available
else self._DEFAULT_CLIENTS
)
allowed_clients = sorted(
@@ -3039,6 +2794,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.report_warning(f'Skipping unsupported client "{client}"')
else:
requested_clients.append(client)
if not (requested_clients or excluded_clients) and default_clients == self._DEFAULT_JSLESS_CLIENTS:
self.report_warning(
f'No supported JavaScript runtime could be found. YouTube extraction without '
f'a JS runtime has been deprecated, and some formats may be missing. '
f'See {_EJS_WIKI_URL} for details on installing one. To silence this warning, '
f'you can use --extractor-args "youtube:player_client=default"', only_once=True)
if not requested_clients:
requested_clients.extend(default_clients)
for excluded_client in excluded_clients:
@@ -3173,12 +2936,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = client
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
f[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
f[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
f[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
if deprioritize_pr:
deprioritized_prs.append(pr)
else:
@@ -3258,12 +3021,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
self.report_warning(msg, only_once=True)
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
def _extract_formats_and_subtitles(self, video_id, player_responses, player_url, live_status, duration):
CHUNK_SIZE = 10 << 20
PREFERRED_LANG_VALUE = 10
original_language = None
itags, stream_ids = collections.defaultdict(set), []
itag_qualities, res_qualities = {}, {0: None}
subtitles = {}
q = qualities([
# Normally tiny is the smallest video-only formats. But
# audio-only formats with unknown quality may get tagged as tiny
@@ -3271,7 +3035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
format_types = self._configuration_arg('formats')
all_formats = 'duplicate' in format_types
if self._configuration_arg('include_duplicate_formats'):
@@ -3279,6 +3042,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
'Use formats=duplicate extractor argument instead')
def solve_sig(s, spec):
return ''.join(s[i] for i in spec)
def build_fragments(f):
return LazyList({
'url': update_url_query(f['url'], {
@@ -3298,23 +3064,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# For handling potential pre-playback required waiting period
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
for fmt in streaming_formats:
client_name = fmt[STREAMING_DATA_CLIENT_NAME]
available_at = fmt[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
if fmt.get('targetDurationSec'):
for pr in player_responses:
streaming_data = traverse_obj(pr, 'streamingData')
if not streaming_data:
continue
fetch_po_token_func = streaming_data[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
itag = str_or_none(fmt.get('itag'))
audio_track = fmt.get('audioTrack') or {}
stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
if not all_formats:
if stream_id in stream_ids:
continue
def get_stream_id(fmt_stream):
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
quality = fmt.get('quality')
height = int_or_none(fmt.get('height'))
def process_format_stream(fmt_stream, proto, missing_pot):
nonlocal original_language
itag = str_or_none(fmt_stream.get('itag'))
audio_track = fmt_stream.get('audioTrack') or {}
quality = fmt_stream.get('quality')
height = int_or_none(fmt_stream.get('height'))
if quality == 'tiny' or not quality:
quality = fmt.get('audioQuality', '').lower() or quality
quality = fmt_stream.get('audioQuality', '').lower() or quality
# The 3gp format (17) in android client has a quality of "small",
# but is actually worse than other formats
if itag == '17':
@@ -3333,16 +3104,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if language_code and (is_original or (is_default and not original_language)):
original_language = language_code
has_drm = bool(fmt.get('drmFamilies'))
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`)
if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not has_drm:
continue
has_drm = bool(fmt_stream.get('drmFamilies'))
if has_drm:
msg = f'Some {client_name} client https formats have been skipped as they are DRM protected. '
msg = f'Some {client_name} client {proto} formats have been skipped as they are DRM protected. '
if client_name == 'tv':
msg += (
f'{"Your account" if self.is_authenticated else "The current session"} may have '
@@ -3351,9 +3116,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
self.report_warning(msg, video_id, only_once=True)
fmt_url = fmt.get('url')
tbr = float_or_none(fmt_stream.get('averageBitrate') or fmt_stream.get('bitrate'), 1000)
format_duration = traverse_obj(fmt_stream, ('approxDurationMs', {float_or_none(scale=1000)}))
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
# Make sure to avoid false positives with small duration differences.
# E.g. __2ABJjxzNo, ySuUZEjARPY
is_damaged = try_call(lambda: format_duration < duration // 2)
if is_damaged:
self.report_warning(
f'Some {client_name} client {proto} formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
if missing_pot and 'missing_pot' not in self._configuration_arg('formats'):
self._report_pot_format_skipped(video_id, client_name, proto)
return None
name = fmt_stream.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt_stream.get('fps')) or 0
dct = {
'asr': int_or_none(fmt_stream.get('audioSampleRate')),
'filesize': int_or_none(fmt_stream.get('contentLength')),
'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
'format_note': join_nonempty(
join_nonempty(display_name, is_default and ' (default)', delim=''),
name, fmt_stream.get('isDrc') and 'DRC',
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
'audio_channels': fmt_stream.get('audioChannels'),
'height': height,
'quality': q(quality) - bool(fmt_stream.get('isDrc')) / 2,
'has_drm': has_drm,
'tbr': tbr,
'filesize_approx': filesize_from_tbr(tbr, format_duration),
'width': int_or_none(fmt_stream.get('width')),
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
# Strictly de-prioritize damaged and 3gp formats
'preference': -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt_stream.get('mimeType') or '')
if mime_mobj:
dct['ext'] = mimetype2ext(mime_mobj.group(1))
dct.update(parse_codecs(mime_mobj.group(2)))
single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
if single_stream and dct.get('ext'):
dct['container'] = dct['ext'] + '_dash'
return dct
def process_https_formats():
proto = 'https'
https_fmts = []
for fmt_stream in streaming_formats:
if fmt_stream.get('targetDurationSec'):
continue
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`)
if fmt_stream.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not bool(fmt_stream.get('drmFamilies')):
continue
stream_id = get_stream_id(fmt_stream)
if not all_formats:
if stream_id in stream_ids:
continue
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
require_po_token = (
stream_id[0] not in ['18']
and gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided))
po_token = (
gvs_pots.get(client_name)
or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
if client_name not in gvs_pots:
gvs_pots[client_name] = po_token
fmt_url = fmt_stream.get('url')
encrypted_sig, sc = None, None
if not fmt_url:
sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
if not all((sc, fmt_url, player_url, encrypted_sig)):
@@ -3368,134 +3221,134 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
self.report_warning(msg, video_id, only_once=True)
continue
try:
fmt_url += '&{}={}'.format(
traverse_obj(sc, ('sp', -1)) or 'signature',
self._decrypt_signature(encrypted_sig, video_id, player_url),
)
except ExtractorError as e:
self.report_warning(
f'Signature extraction failed: Some formats may be missing\n'
f' player = {player_url}\n'
f' {bug_reports_message(before="")}',
video_id=video_id, only_once=True)
self.write_debug(
f'{video_id}: Signature extraction failure info:\n'
f' encrypted sig = {encrypted_sig}\n'
f' player = {player_url}')
self.write_debug(e, only_once=True)
fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
if not fmt:
continue
# signature
# Attempt to load sig spec from cache
if encrypted_sig:
spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig))
spec = self._load_sig_spec_from_cache(spec_cache_id)
if spec:
self.write_debug(f'Using cached signature function {spec_cache_id}', only_once=True)
fmt_url += '&{}={}'.format(traverse_obj(sc, ('sp', -1)) or 'signature',
solve_sig(encrypted_sig, spec))
else:
fmt['_jsc_s_challenge'] = encrypted_sig
fmt['_jsc_s_sc'] = sc
# n challenge
query = parse_qs(fmt_url)
if query.get('n'):
try:
decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
fmt_url = update_url_query(fmt_url, {
'n': decrypt_nsig(query['n'][0], video_id, player_url),
})
except ExtractorError as e:
if player_url:
self.report_warning(
f'nsig extraction failed: Some formats may be missing\n'
f' n = {query["n"][0]} ; player = {player_url}\n'
f' {bug_reports_message(before="")}',
video_id=video_id, only_once=True)
self.write_debug(e, only_once=True)
n_challenge = query['n'][0]
if n_challenge in self._player_cache:
fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]})
else:
self.report_warning(
'Cannot decrypt nsig without player_url: Some formats may be missing',
video_id=video_id, only_once=True)
continue
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
format_duration = traverse_obj(fmt, ('approxDurationMs', {float_or_none(scale=1000)}))
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
# Make sure to avoid false positives with small duration differences.
# E.g. __2ABJjxzNo, ySuUZEjARPY
is_damaged = try_call(lambda: format_duration < duration // 2)
if is_damaged:
self.report_warning(
'Some formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
fetch_po_token_func = fmt[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
require_po_token = (
itag not in ['18']
and gvs_pot_required(
pot_policy, fmt[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER],
fmt[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]))
po_token = (
gvs_pots.get(client_name)
or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
fmt['_jsc_n_challenge'] = n_challenge
if po_token:
fmt_url = update_url_query(fmt_url, {'pot': po_token})
if client_name not in gvs_pots:
gvs_pots[client_name] = po_token
if not po_token and require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
self._report_pot_format_skipped(video_id, client_name, 'https')
continue
fmt['url'] = fmt_url
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt.get('fps')) or 0
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
'format_note': join_nonempty(
join_nonempty(display_name, is_default and ' (default)', delim=''),
name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
is_damaged and 'DAMAGED', require_po_token and not po_token and 'MISSING POT',
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
'audio_channels': fmt.get('audioChannels'),
'height': height,
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
'has_drm': has_drm,
'tbr': tbr,
'filesize_approx': filesize_from_tbr(tbr, format_duration),
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
# Strictly de-prioritize damaged and 3gp formats
'preference': -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
if mime_mobj:
dct['ext'] = mimetype2ext(mime_mobj.group(1))
dct.update(parse_codecs(mime_mobj.group(2)))
if itag:
itags[itag].add(('https', dct.get('language')))
if stream_id[0]:
itags[stream_id[0]].add((proto, fmt.get('language')))
stream_ids.append(stream_id)
single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
if single_stream and dct.get('ext'):
dct['container'] = dct['ext'] + '_dash'
# For handling potential pre-playback required waiting period
if live_status not in ('is_live', 'post_live'):
dct['available_at'] = available_at
fmt['available_at'] = available_at
if (all_formats or 'dashy' in format_types) and dct['filesize']:
yield {
**dct,
'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
if (all_formats or 'dashy' in format_types) and fmt['filesize']:
https_fmts.append({
**fmt,
'format_id': f'{fmt["format_id"]}-dashy' if all_formats else fmt['format_id'],
'protocol': 'http_dash_segments',
'fragments': build_fragments(dct),
}
'fragments': build_fragments(fmt),
})
if all_formats or 'dashy' not in format_types:
dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
yield dct
fmt['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
https_fmts.append(fmt)
# Bulk process sig/n handling
# Retrieve all JSC Sig and n requests for this player response in one go
n_challenges = {}
s_challenges = {}
for fmt in https_fmts:
# This will de-duplicate requests
n_challenge = fmt.pop('_jsc_n_challenge', None)
if n_challenge is not None:
n_challenges.setdefault(n_challenge, []).append(fmt)
s_challenge = fmt.pop('_jsc_s_challenge', None)
if s_challenge is not None:
s_challenges.setdefault(len(s_challenge), {}).setdefault(s_challenge, []).append(fmt)
challenge_requests = []
if n_challenges:
challenge_requests.append(JsChallengeRequest(
type=JsChallengeType.N,
video_id=video_id,
input=NChallengeInput(challenges=list(n_challenges.keys()), player_url=player_url)))
if s_challenges:
challenge_requests.append(JsChallengeRequest(
type=JsChallengeType.SIG,
video_id=video_id,
input=SigChallengeInput(challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges], player_url=player_url)))
if challenge_requests:
for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests):
if challenge_response.type == JsChallengeType.SIG:
for challenge, result in challenge_response.output.results.items():
spec_id = len(challenge)
spec = [ord(c) for c in result]
self._store_sig_spec_to_cache(self._sig_spec_cache_id(player_url, spec_id), spec)
s_challenge_data = s_challenges.pop(spec_id, {})
if not s_challenge_data:
continue
for s_challenge, fmts in s_challenge_data.items():
solved_challenge = solve_sig(s_challenge, spec)
for fmt in fmts:
sc = fmt.pop('_jsc_s_sc')
fmt['url'] += '&{}={}'.format(
traverse_obj(sc, ('sp', -1)) or 'signature',
solved_challenge)
elif challenge_response.type == JsChallengeType.N:
for challenge, result in challenge_response.output.results.items():
fmts = n_challenges.pop(challenge, [])
for fmt in fmts:
self._player_cache[challenge] = result
fmt['url'] = update_url_query(fmt['url'], {'n': result})
# Raise warning if any challenge requests remain
# Depending on type of challenge request
help_message = (
'Ensure you have a supported JavaScript runtime and '
'challenge solver script distribution installed. '
'Review any warnings presented before this message. '
f'For more details, refer to {_EJS_WIKI_URL}')
if s_challenges:
self.report_warning(
f'Signature solving failed: Some formats may be missing. {help_message}',
video_id=video_id, only_once=True)
if n_challenges:
self.report_warning(
f'n challenge solving failed: Some formats may be missing. {help_message}',
video_id=video_id, only_once=True)
for cfmts in list(s_challenges.values()) + list(n_challenges.values()):
for fmt in cfmts:
if fmt in https_fmts:
https_fmts.remove(fmt)
yield from https_fmts
yield from process_https_formats()
needs_live_processing = self._needs_live_processing(live_status, duration)
skip_bad_formats = 'incomplete' not in format_types
@@ -3504,6 +3357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
or (needs_live_processing and skip_bad_formats)):
skip_manifests.add('hls')
if skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
skip_manifests.add('dash')
@@ -3558,19 +3412,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f['source_preference'] -= 5
return True
subtitles = {}
for sd in streaming_data:
client_name = sd[STREAMING_DATA_CLIENT_NAME]
fetch_pot_func = sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
is_premium_subscriber = sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
has_player_token = sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
hls_manifest_url = 'hls' not in skip_manifests and streaming_data.get('hlsManifestUrl')
if hls_manifest_url:
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS]
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
if client_name not in gvs_pots:
@@ -3590,12 +3437,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'/itag/(\d+)', f['url'], 'itag', default=None), require_po_token and not po_token):
yield f
dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
dash_manifest_url = 'dash' not in skip_manifests and streaming_data.get('dashManifestUrl')
if dash_manifest_url:
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH]
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)
po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended))
if po_token:
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
if client_name not in gvs_pots:
@@ -3615,7 +3462,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
if needs_live_processing:
f['is_from_start'] = True
yield f
yield subtitles
@@ -3688,14 +3534,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else 'was_live' if live_content
else 'not_live' if False in (is_live, live_content)
else None)
streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
*formats, subtitles = self._extract_formats_and_subtitles(video_id, player_responses, player_url, live_status, duration)
if all(f.get('has_drm') for f in formats):
# If there are no formats that definitely don't have DRM, all have DRM
for f in formats:
f['has_drm'] = True
return live_broadcast_details, live_status, streaming_data, formats, subtitles
return live_broadcast_details, live_status, formats, subtitles
def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg):
initial_data = None
@@ -3855,8 +3700,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
or int_or_none(get_first(microformats, 'lengthSeconds'))
or parse_duration(search_meta('duration')) or None)
live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
live_broadcast_details, live_status, formats, automatic_captions = \
self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
if live_status == 'post_live':
self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')

View File

@@ -0,0 +1,132 @@
# YoutubeIE JS Challenge Provider Framework
As part of the YouTube extractor, we have a framework for solving n/sig JS Challenges programmatically. This can be used by plugins.
> [!TIP]
> If publishing a JS Challenge Provider plugin to GitHub, add the [yt-dlp-jsc-provider](https://github.com/topics/yt-dlp-jsc-provider) topic to your repository to help users find it.
## Public APIs
- `yt_dlp.extractor.youtube.jsc.provider`
Everything else is **internal-only** and no guarantees are made about the API stability.
> [!WARNING]
> We will try our best to maintain stability with the public APIs.
> However, due to the nature of extractors and YouTube, we may need to remove or change APIs in the future.
> If you are using these APIs outside yt-dlp plugins, please account for this by importing them safely.
## JS Challenge Provider
`yt_dlp.extractor.youtube.jsc.provider`
```python
from yt_dlp.extractor.youtube.jsc.provider import (
register_provider,
register_preference,
JsChallengeProvider,
JsChallengeRequest,
JsChallengeResponse,
JsChallengeProviderError,
JsChallengeProviderRejectedRequest,
JsChallengeType,
JsChallengeProviderResponse,
NChallengeOutput,
)
from yt_dlp.utils import traverse_obj, Popen
import json
import subprocess
import typing
@register_provider
class MyJsChallengeProviderJCP(JsChallengeProvider): # Provider class name must end with "JCP"
PROVIDER_VERSION = '0.2.1'
# Define a unique display name for the provider
PROVIDER_NAME = 'my-provider'
BUG_REPORT_LOCATION = 'https://issues.example.com/report'
# Set supported challenge types.
# If None, the provider will handle all types.
_SUPPORTED_TYPES = [JsChallengeType.N]
def is_available(self) -> bool:
"""
Check if the provider is available (e.g. all required dependencies are available)
This is used to determine if the provider should be used and to provide debug information.
IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
Since this is called multiple times, we recommend caching the result.
"""
return True
def close(self):
# Optional close hook, called when YoutubeDL is closed.
pass
def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
# If you need to do additional validation on the requests.
# Raise yt_dlp.extractor.youtube.jsc.provider.JsChallengeProviderRejectedRequest if the request is not supported.
if len("something") > 255:
raise JsChallengeProviderRejectedRequest('Challenges longer than 255 are not supported', expected=True)
# Settings are pulled from extractor args passed to yt-dlp with the key `youtubejsc-<PROVIDER_KEY>`.
# For this example, the extractor arg would be:
# `--extractor-args "youtubejsc-myjschallengeprovider:bin_path=/path/to/bin"`
bin_path = self._configuration_arg(
'bin_path', default=['/path/to/bin'])[0]
# See below for logging guidelines
self.logger.trace(f'Using bin path: {bin_path}')
for request in requests:
# You can use the _get_player method to get the player JS code if needed.
# This shares the same caching as the YouTube extractor, so it will not make unnecessary requests.
player_js = self._get_player(request.video_id, request.input.player_url)
cmd = f'{bin_path} {request.input.challenges} {player_js}'
self.logger.info(f'Executing command: {cmd}')
stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
if ret != 0:
# If there is an error, raise JsChallengeProviderError.
# The request will be sent to the next provider if there is one.
# You can specify whether it is expected or not. If it is unexpected,
# the log will include a link to the bug report location (BUG_REPORT_LOCATION).
# raise JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
# You can also only fail this specific request by returning a JsChallengeProviderResponse with the error.
# This will allow other requests to be processed by this provider.
yield JsChallengeProviderResponse(
request=request,
error=JsChallengeProviderError(f'Command returned error code {ret}', expected=False)
)
yield JsChallengeProviderResponse(
request=request,
response=JsChallengeResponse(
type=JsChallengeType.N,
output=NChallengeOutput(results=traverse_obj(json.loads(stdout))),
))
# If there are multiple JS Challenge Providers that can handle the same JsChallengeRequest(s),
# you can define a preference function to increase/decrease the priority of providers.
@register_preference(MyJsChallengeProviderJCP)
def my_provider_preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
return 50
```
## Logging Guidelines
- Use the `self.logger` object to log messages.
- When making HTTP requests or any other time-expensive operation, use `self.logger.info` to log a message to standard non-verbose output.
- This lets users know what is happening when a time-expensive operation is taking place.
- Technical information such as a command being executed should be logged to `self.logger.debug`
- Use `self.logger.trace` for very detailed information that is only useful for debugging to avoid cluttering the debug log.
## Debugging
- Use `-v --extractor-args "youtube:jsc_trace=true"` to enable JS Challenge debug output.

View File

@@ -0,0 +1,5 @@
# Trigger import of built-in providers
from ._builtin.bun import BunJCP as _BunJCP # noqa: F401
from ._builtin.deno import DenoJCP as _DenoJCP # noqa: F401
from ._builtin.node import NodeJCP as _NodeJCP # noqa: F401
from ._builtin.quickjs import QuickJSJCP as _QuickJSJCP # noqa: F401

View File

@@ -0,0 +1,146 @@
from __future__ import annotations
import os
import re
import shlex
import subprocess
import urllib.parse
from yt_dlp.extractor.youtube.jsc._builtin.ejs import (
_EJS_WIKI_URL,
EJSBaseJCP,
Script,
ScriptSource,
ScriptType,
ScriptVariant,
)
from yt_dlp.extractor.youtube.jsc._builtin.vendor import load_script
from yt_dlp.extractor.youtube.jsc.provider import (
JsChallengeProvider,
JsChallengeProviderError,
JsChallengeRequest,
register_preference,
register_provider,
)
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
from yt_dlp.utils import Popen
from yt_dlp.utils.networking import HTTPHeaderDict, clean_proxies
# KNOWN ISSUES:
# - If node_modules is present and includes a requested lib, the version we request is ignored
# and whatever installed in node_modules is used.
# - No way to ignore existing node_modules, lock files, etc.
# - No sandboxing options available
# - Cannot detect if npm packages are cached without potentially downloading them.
# `--no-install` appears to disable the cache.
# - npm auto-install may fail with an integrity error when using HTTP proxies
# - npm auto-install HTTP proxy support may be limited on older Bun versions
@register_provider
class BunJCP(EJSBaseJCP, BuiltinIEContentProvider):
PROVIDER_NAME = 'bun'
JS_RUNTIME_NAME = 'bun'
BUN_NPM_LIB_FILENAME = 'yt.solver.bun.lib.js'
SUPPORTED_PROXY_SCHEMES = ['http', 'https']
def _iter_script_sources(self):
yield from super()._iter_script_sources()
yield ScriptSource.BUILTIN, self._bun_npm_source
def _bun_npm_source(self, script_type: ScriptType, /):
if script_type != ScriptType.LIB:
return None
if 'ejs:npm' not in self.ie.get_param('remote_components', []):
return self._skip_component('ejs:npm')
# Check to see if the environment proxies are compatible with Bun npm source
if unsupported_scheme := self._check_env_proxies(self._get_env_options()):
self.logger.warning(
f'Bun NPM package downloads only support HTTP/HTTPS proxies; skipping remote NPM package downloads. '
f'Provide another distribution of the challenge solver script or use '
f'another JS runtime that supports "{unsupported_scheme}" proxies. '
f'For more information and alternatives, refer to {_EJS_WIKI_URL}')
return None
# Bun-specific lib scripts that uses Bun autoimport
# https://bun.com/docs/runtime/autoimport
error_hook = lambda e: self.logger.warning(
f'Failed to read bun challenge solver lib script: {e}{provider_bug_report_message(self)}')
code = load_script(
self.BUN_NPM_LIB_FILENAME, error_hook=error_hook)
if code:
return Script(script_type, ScriptVariant.BUN_NPM, ScriptSource.BUILTIN, self._SCRIPT_VERSION, code)
return None
def _check_env_proxies(self, env):
# check that the schemes of both HTTP_PROXY and HTTPS_PROXY are supported
for key in ('HTTP_PROXY', 'HTTPS_PROXY'):
proxy = env.get(key)
if not proxy:
continue
scheme = urllib.parse.urlparse(proxy).scheme.lower()
if scheme not in self.SUPPORTED_PROXY_SCHEMES:
return scheme
return None
def _get_env_options(self) -> dict[str, str]:
options = os.environ.copy() # pass through existing bun env vars
request_proxies = self.ie._downloader.proxies.copy()
clean_proxies(request_proxies, HTTPHeaderDict())
# Apply 'all' proxy first, then allow per-scheme overrides
if request_proxies.get('all') is not None:
options['HTTP_PROXY'] = options['HTTPS_PROXY'] = request_proxies['all']
for key, env in (('http', 'HTTP_PROXY'), ('https', 'HTTPS_PROXY')):
val = request_proxies.get(key)
if val is not None:
options[env] = val
if self.ie.get_param('nocheckcertificate'):
options['NODE_TLS_REJECT_UNAUTHORIZED'] = '0'
# Prevent segfault: <https://github.com/oven-sh/bun/issues/22901>
options.pop('JSC_useJIT', None)
if self.ejs_setting('jitless', ['false']) != ['false']:
options['BUN_JSC_useJIT'] = '0'
return options
def _run_js_runtime(self, stdin: str, /) -> str:
# https://bun.com/docs/cli/run
options = ['--no-addons', '--prefer-offline']
if self._lib_script.variant == ScriptVariant.BUN_NPM:
# Enable auto-install even if node_modules is present
options.append('--install=fallback')
else:
options.append('--no-install')
cmd = [self.runtime_info.path, '--bun', 'run', *options, '-']
self.logger.debug(f'Running bun: {shlex.join(cmd)}')
with Popen(
cmd,
text=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=self._get_env_options(),
) as proc:
stdout, stderr = proc.communicate_or_kill(stdin)
stderr = self._clean_stderr(stderr)
if proc.returncode or stderr:
msg = f'Error running bun process (returncode: {proc.returncode})'
if stderr:
msg = f'{msg}: {stderr.strip()}'
raise JsChallengeProviderError(msg)
return stdout
def _clean_stderr(self, stderr):
return '\n'.join(
line for line in stderr.splitlines()
if not re.match(r'^Bun v\d+\.\d+\.\d+ \([\w\s]+\)$', line))
@register_preference(BunJCP)
def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
return 800

View File

@@ -0,0 +1,125 @@
from __future__ import annotations
import os
import re
import shlex
import subprocess
from yt_dlp.extractor.youtube.jsc._builtin.ejs import (
EJSBaseJCP,
Script,
ScriptSource,
ScriptType,
ScriptVariant,
)
from yt_dlp.extractor.youtube.jsc._builtin.vendor import load_script
from yt_dlp.extractor.youtube.jsc.provider import (
JsChallengeProvider,
JsChallengeProviderError,
JsChallengeRequest,
register_preference,
register_provider,
)
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
from yt_dlp.utils import Popen, remove_terminal_sequences
from yt_dlp.utils.networking import HTTPHeaderDict, clean_proxies
@register_provider
class DenoJCP(EJSBaseJCP, BuiltinIEContentProvider):
PROVIDER_NAME = 'deno'
JS_RUNTIME_NAME = 'deno'
_DENO_BASE_OPTIONS = ['--no-prompt', '--no-remote', '--no-lock', '--node-modules-dir=none', '--no-config']
DENO_NPM_LIB_FILENAME = 'yt.solver.deno.lib.js'
_NPM_PACKAGES_CACHED = False
def _iter_script_sources(self):
yield from super()._iter_script_sources()
yield ScriptSource.BUILTIN, self._deno_npm_source
def _deno_npm_source(self, script_type: ScriptType, /):
if script_type != ScriptType.LIB:
return None
# Deno-specific lib scripts that use Deno NPM imports
error_hook = lambda e: self.logger.warning(
f'Failed to read deno challenge solver lib script: {e}{provider_bug_report_message(self)}')
code = load_script(
self.DENO_NPM_LIB_FILENAME, error_hook=error_hook)
if not code:
return None
if 'ejs:npm' not in self.ie.get_param('remote_components', []):
# We may still be able to continue if the npm packages are available/cached
self._NPM_PACKAGES_CACHED = self._npm_packages_cached(code)
if not self._NPM_PACKAGES_CACHED:
return self._skip_component('ejs:npm')
return Script(script_type, ScriptVariant.DENO_NPM, ScriptSource.BUILTIN, self._SCRIPT_VERSION, code)
def _npm_packages_cached(self, stdin: str) -> bool:
# Check if npm packages are cached, so we can run without --remote-components ejs:npm
self.logger.debug('Checking if npm packages are cached')
try:
self._run_deno(stdin, [*self._DENO_BASE_OPTIONS, '--cached-only'])
except JsChallengeProviderError as e:
self.logger.trace(f'Deno npm packages not cached: {e}')
return False
return True
def _run_js_runtime(self, stdin: str, /) -> str:
options = [*self._DENO_BASE_OPTIONS]
if self._lib_script.variant == ScriptVariant.DENO_NPM and self._NPM_PACKAGES_CACHED:
options.append('--cached-only')
elif self._lib_script.variant != ScriptVariant.DENO_NPM:
options.append('--no-npm')
options.append('--cached-only')
if self.ie.get_param('nocheckcertificate'):
options.append('--unsafely-ignore-certificate-errors')
# XXX: Convert this extractor-arg into a general option if/when a JSI framework is implemented
if self.ejs_setting('jitless', ['false']) != ['false']:
options.append('--v8-flags=--jitless')
return self._run_deno(stdin, options)
def _get_env_options(self) -> dict[str, str]:
options = os.environ.copy() # pass through existing deno env vars
request_proxies = self.ie._downloader.proxies.copy()
clean_proxies(request_proxies, HTTPHeaderDict())
# Apply 'all' proxy first, then allow per-scheme overrides
if 'all' in request_proxies and request_proxies['all'] is not None:
options['HTTP_PROXY'] = options['HTTPS_PROXY'] = request_proxies['all']
for key, env in (('http', 'HTTP_PROXY'), ('https', 'HTTPS_PROXY'), ('no', 'NO_PROXY')):
if key in request_proxies and request_proxies[key] is not None:
options[env] = request_proxies[key]
return options
def _run_deno(self, stdin, options) -> str:
cmd = [self.runtime_info.path, 'run', *options, '-']
self.logger.debug(f'Running deno: {shlex.join(cmd)}')
with Popen(
cmd,
text=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=self._get_env_options(),
) as proc:
stdout, stderr = proc.communicate_or_kill(stdin)
stderr = self._clean_stderr(stderr)
if proc.returncode or stderr:
msg = f'Error running deno process (returncode: {proc.returncode})'
if stderr:
msg = f'{msg}: {stderr.strip()}'
raise JsChallengeProviderError(msg)
return stdout
def _clean_stderr(self, stderr):
return '\n'.join(
line for line in stderr.splitlines()
if not (
re.match(r'^Download\s+https\S+$', remove_terminal_sequences(line))
or re.match(r'DANGER: TLS certificate validation is disabled for all hostnames', remove_terminal_sequences(line))))
@register_preference(DenoJCP)
def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
return 1000

View File

@@ -0,0 +1,326 @@
from __future__ import annotations
import collections
import dataclasses
import enum
import functools
import hashlib
import json
from yt_dlp.dependencies import yt_dlp_ejs as _has_ejs
from yt_dlp.extractor.youtube.jsc._builtin import vendor
from yt_dlp.extractor.youtube.jsc.provider import (
JsChallengeProvider,
JsChallengeProviderError,
JsChallengeProviderRejectedRequest,
JsChallengeProviderResponse,
JsChallengeResponse,
JsChallengeType,
NChallengeOutput,
SigChallengeOutput,
)
from yt_dlp.extractor.youtube.pot._provider import configuration_arg
from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
from yt_dlp.utils._jsruntime import JsRuntimeInfo
if _has_ejs:
import yt_dlp_ejs.yt.solver
TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import Callable, Generator
from yt_dlp.extractor.youtube.jsc.provider import JsChallengeRequest
_EJS_WIKI_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/EJS'
class ScriptType(enum.Enum):
LIB = 'lib'
CORE = 'core'
class ScriptVariant(enum.Enum):
UNKNOWN = 'unknown'
MINIFIED = 'minified'
UNMINIFIED = 'unminified'
DENO_NPM = 'deno_npm'
BUN_NPM = 'bun_npm'
class ScriptSource(enum.Enum):
PYPACKAGE = 'python package' # PyPI, PyInstaller exe, zipimport binary, etc
CACHE = 'cache' # GitHub release assets (cached)
WEB = 'web' # GitHub release assets (downloaded)
BUILTIN = 'builtin' # vendored (full core script; import-only lib script + NPM cache)
@dataclasses.dataclass
class Script:
type: ScriptType
variant: ScriptVariant
source: ScriptSource
version: str
code: str
@functools.cached_property
def hash(self, /) -> str:
return hashlib.sha3_512(self.code.encode()).hexdigest()
def __str__(self, /):
return f'<Script {self.type.value!r} v{self.version} (source: {self.source.value}) variant={self.variant.value!r} size={len(self.code)} hash={self.hash[:7]}...>'
class EJSBaseJCP(JsChallengeProvider):
JS_RUNTIME_NAME: str
_CACHE_SECTION = 'challenge-solver'
_REPOSITORY = 'yt-dlp/ejs'
_SUPPORTED_TYPES = [JsChallengeType.N, JsChallengeType.SIG]
_SCRIPT_VERSION = vendor.VERSION
# TODO: Integration tests for each kind of scripts source
_ALLOWED_HASHES = {
ScriptType.LIB: {
ScriptVariant.UNMINIFIED: vendor.HASHES['yt.solver.lib.js'],
ScriptVariant.MINIFIED: vendor.HASHES['yt.solver.lib.min.js'],
ScriptVariant.DENO_NPM: vendor.HASHES['yt.solver.deno.lib.js'],
ScriptVariant.BUN_NPM: vendor.HASHES['yt.solver.bun.lib.js'],
},
ScriptType.CORE: {
ScriptVariant.MINIFIED: vendor.HASHES['yt.solver.core.min.js'],
ScriptVariant.UNMINIFIED: vendor.HASHES['yt.solver.core.js'],
},
}
_SCRIPT_FILENAMES = {
ScriptType.LIB: 'yt.solver.lib.js',
ScriptType.CORE: 'yt.solver.core.js',
}
_MIN_SCRIPT_FILENAMES = {
ScriptType.LIB: 'yt.solver.lib.min.js',
ScriptType.CORE: 'yt.solver.core.min.js',
}
# currently disabled as files are large and we do not support rotation
_ENABLE_PREPROCESSED_PLAYER_CACHE = False
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._available = True
self.ejs_settings = self.ie.get_param('extractor_args', {}).get('youtube-ejs', {})
# Note: The following 3 args are for developer use only & intentionally not documented.
# - dev: bypasses verification of script hashes and versions.
# - repo: use a custom GitHub repository to fetch web script from.
# - script_version: use a custom script version.
# E.g. --extractor-args "youtube-ejs:dev=true;script_version=0.1.4"
self.is_dev = self.ejs_setting('dev', ['false'])[0] == 'true'
if self.is_dev:
self.report_dev_option('You have enabled dev mode for EJS JCP Providers.')
custom_repo = self.ejs_setting('repo', [None])[0]
if custom_repo:
self.report_dev_option(f'You have set a custom GitHub repository for EJS JCP Providers ({custom_repo}).')
self._REPOSITORY = custom_repo
custom_version = self.ejs_setting('script_version', [None])[0]
if custom_version:
self.report_dev_option(f'You have set a custom EJS script version for EJS JCP Providers ({custom_version}).')
self._SCRIPT_VERSION = custom_version
def ejs_setting(self, key, *args, **kwargs):
return configuration_arg(self.ejs_settings, key, *args, **kwargs)
def report_dev_option(self, message: str):
self.ie.report_warning(
f'{message} '
f'This is a developer option intended for debugging. \n'
' If you experience any issues while using this option, '
f'{self.ie._downloader._format_err("DO NOT", self.ie._downloader.Styles.ERROR)} open a bug report', only_once=True)
def _run_js_runtime(self, stdin: str, /) -> str:
"""To be implemented by subclasses"""
raise NotImplementedError
def _real_bulk_solve(self, /, requests: list[JsChallengeRequest]):
grouped: dict[str, list[JsChallengeRequest]] = collections.defaultdict(list)
for request in requests:
grouped[request.input.player_url].append(request)
for player_url, grouped_requests in grouped.items():
player = None
if self._ENABLE_PREPROCESSED_PLAYER_CACHE:
player = self.ie.cache.load(self._CACHE_SECTION, f'player:{player_url}')
if player:
cached = True
else:
cached = False
video_id = next((request.video_id for request in grouped_requests), None)
player = self._get_player(video_id, player_url)
# NB: This output belongs after the player request
self.logger.info(f'Solving JS challenges using {self.JS_RUNTIME_NAME}')
stdin = self._construct_stdin(player, cached, grouped_requests)
stdout = self._run_js_runtime(stdin)
output = json.loads(stdout)
if output['type'] == 'error':
raise JsChallengeProviderError(output['error'])
if self._ENABLE_PREPROCESSED_PLAYER_CACHE and (preprocessed := output.get('preprocessed_player')):
self.ie.cache.store(self._CACHE_SECTION, f'player:{player_url}', preprocessed)
for request, response_data in zip(grouped_requests, output['responses'], strict=True):
if response_data['type'] == 'error':
yield JsChallengeProviderResponse(request, None, response_data['error'])
else:
yield JsChallengeProviderResponse(request, JsChallengeResponse(request.type, (
NChallengeOutput(response_data['data']) if request.type is JsChallengeType.N
else SigChallengeOutput(response_data['data']))))
def _construct_stdin(self, player: str, preprocessed: bool, requests: list[JsChallengeRequest], /) -> str:
json_requests = [{
'type': request.type.value,
'challenges': request.input.challenges,
} for request in requests]
data = {
'type': 'preprocessed',
'preprocessed_player': player,
'requests': json_requests,
} if preprocessed else {
'type': 'player',
'player': player,
'requests': json_requests,
'output_preprocessed': True,
}
return f'''\
{self._lib_script.code}
Object.assign(globalThis, lib);
{self._core_script.code}
console.log(JSON.stringify(jsc({json.dumps(data)})));
'''
# region: challenge solver script
@functools.cached_property
def _lib_script(self, /):
return self._get_script(ScriptType.LIB)
@functools.cached_property
def _core_script(self, /):
return self._get_script(ScriptType.CORE)
def _get_script(self, script_type: ScriptType, /) -> Script:
skipped_components: list[_SkippedComponent] = []
for _, from_source in self._iter_script_sources():
script = from_source(script_type)
if not script:
continue
if isinstance(script, _SkippedComponent):
skipped_components.append(script)
continue
if not self.is_dev:
if script.version != self._SCRIPT_VERSION:
self.logger.warning(
f'Challenge solver {script_type.value} script version {script.version} '
f'is not supported (source: {script.source.value}, variant: {script.variant}, supported version: {self._SCRIPT_VERSION})')
if script.source is ScriptSource.CACHE:
self.logger.debug('Clearing outdated cached script')
self.ie.cache.store(self._CACHE_SECTION, script_type.value, None)
continue
script_hashes = self._ALLOWED_HASHES[script.type].get(script.variant, [])
if script_hashes and script.hash not in script_hashes:
self.logger.warning(
f'Hash mismatch on challenge solver {script.type.value} script '
f'(source: {script.source.value}, variant: {script.variant}, hash: {script.hash})!{provider_bug_report_message(self)}')
if script.source is ScriptSource.CACHE:
self.logger.debug('Clearing invalid cached script')
self.ie.cache.store(self._CACHE_SECTION, script_type.value, None)
continue
self.logger.debug(
f'Using challenge solver {script.type.value} script v{script.version} '
f'(source: {script.source.value}, variant: {script.variant.value})')
break
else:
self._available = False
raise JsChallengeProviderRejectedRequest(
f'No usable challenge solver {script_type.value} script available',
_skipped_components=skipped_components or None,
)
return script
def _iter_script_sources(self) -> Generator[tuple[ScriptSource, Callable[[ScriptType], Script | None]]]:
yield from [
(ScriptSource.PYPACKAGE, self._pypackage_source),
(ScriptSource.CACHE, self._cached_source),
(ScriptSource.BUILTIN, self._builtin_source),
(ScriptSource.WEB, self._web_release_source)]
def _pypackage_source(self, script_type: ScriptType, /) -> Script | None:
if not _has_ejs:
return None
try:
code = yt_dlp_ejs.yt.solver.core() if script_type is ScriptType.CORE else yt_dlp_ejs.yt.solver.lib()
except Exception as e:
self.logger.warning(
f'Failed to load challenge solver {script_type.value} script from python package: {e}{provider_bug_report_message(self)}')
return None
return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.PYPACKAGE, yt_dlp_ejs.version, code)
def _cached_source(self, script_type: ScriptType, /) -> Script | None:
if data := self.ie.cache.load(self._CACHE_SECTION, script_type.value):
return Script(script_type, ScriptVariant(data['variant']), ScriptSource.CACHE, data['version'], data['code'])
return None
def _builtin_source(self, script_type: ScriptType, /) -> Script | None:
error_hook = lambda _: self.logger.warning(
f'Failed to read builtin challenge solver {script_type.value} script{provider_bug_report_message(self)}')
code = vendor.load_script(
self._SCRIPT_FILENAMES[script_type], error_hook=error_hook)
if code:
return Script(script_type, ScriptVariant.UNMINIFIED, ScriptSource.BUILTIN, self._SCRIPT_VERSION, code)
return None
def _web_release_source(self, script_type: ScriptType, /):
if 'ejs:github' not in (self.ie.get_param('remote_components') or ()):
return self._skip_component('ejs:github')
url = f'https://github.com/{self._REPOSITORY}/releases/download/{self._SCRIPT_VERSION}/{self._MIN_SCRIPT_FILENAMES[script_type]}'
if code := self.ie._download_webpage_with_retries(
url, None, f'[{self.logger.prefix}] Downloading challenge solver {script_type.value} script from {url}',
f'[{self.logger.prefix}] Failed to download challenge solver {script_type.value} script', fatal=False,
):
self.ie.cache.store(self._CACHE_SECTION, script_type.value, {
'version': self._SCRIPT_VERSION,
'variant': ScriptVariant.MINIFIED.value,
'code': code,
})
return Script(script_type, ScriptVariant.MINIFIED, ScriptSource.WEB, self._SCRIPT_VERSION, code)
return None
# endregion: challenge solver script
@property
def runtime_info(self) -> JsRuntimeInfo | None:
runtime = self.ie._downloader._js_runtimes.get(self.JS_RUNTIME_NAME)
if not runtime or not runtime.info or not runtime.info.supported:
return None
return runtime.info
def is_available(self, /) -> bool:
if not self.runtime_info:
return False
return self._available
def _skip_component(self, component: str, /):
return _SkippedComponent(component, self.JS_RUNTIME_NAME)
@dataclasses.dataclass
class _SkippedComponent:
component: str
runtime: str

View File

@@ -0,0 +1,70 @@
from __future__ import annotations
import re
import shlex
import subprocess
from yt_dlp.extractor.youtube.jsc._builtin.ejs import EJSBaseJCP
from yt_dlp.extractor.youtube.jsc.provider import (
JsChallengeProvider,
JsChallengeProviderError,
JsChallengeRequest,
register_preference,
register_provider,
)
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
from yt_dlp.utils import Popen
@register_provider
class NodeJCP(EJSBaseJCP, BuiltinIEContentProvider):
PROVIDER_NAME = 'node'
JS_RUNTIME_NAME = 'node'
_ARGS = ['-']
def _run_js_runtime(self, stdin: str, /) -> str:
args = []
if self.ejs_setting('jitless', ['false']) != ['false']:
args.append('--v8-flags=--jitless')
# Node permission flag changed from experimental to stable in v23.5.0
if self.runtime_info.version_tuple < (23, 5, 0):
args.append('--experimental-permission')
args.append('--no-warnings=ExperimentalWarning')
else:
args.append('--permission')
cmd = [self.runtime_info.path, *args, *self._ARGS]
self.logger.debug(f'Running node: {shlex.join(cmd)}')
with Popen(
cmd,
text=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
) as proc:
stdout, stderr = proc.communicate_or_kill(stdin)
stderr = self._clean_stderr(stderr)
if proc.returncode or stderr:
msg = f'Error running node process (returncode: {proc.returncode})'
if stderr:
msg = f'{msg}: {stderr.strip()}'
raise JsChallengeProviderError(msg)
return stdout
def _clean_stderr(self, stderr):
return '\n'.join(
line for line in stderr.splitlines()
if not (
re.match(r'^\[stdin\]:', line)
or re.match(r'^var jsc', line)
or '(Use `node --trace-uncaught ...` to show where the exception was thrown)' == line
or re.match(r'^Node\.js v\d+\.\d+\.\d+$', line)))
@register_preference(NodeJCP)
def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
return 900

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
import pathlib
import shlex
import subprocess
import tempfile
from yt_dlp.extractor.youtube.jsc._builtin.ejs import EJSBaseJCP
from yt_dlp.extractor.youtube.jsc.provider import (
JsChallengeProvider,
JsChallengeProviderError,
JsChallengeRequest,
register_preference,
register_provider,
)
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
from yt_dlp.utils import Popen
@register_provider
class QuickJSJCP(EJSBaseJCP, BuiltinIEContentProvider):
PROVIDER_NAME = 'quickjs'
JS_RUNTIME_NAME = 'quickjs'
def _run_js_runtime(self, stdin: str, /) -> str:
if self.runtime_info.name == 'quickjs-ng':
self.logger.warning('QuickJS-NG is missing some optimizations making this very slow. Consider using upstream QuickJS instead.')
elif self.runtime_info.version_tuple < (2025, 4, 26):
self.logger.warning('Older QuickJS versions are missing optimizations making this very slow. Consider upgrading.')
# QuickJS does not support reading from stdin, so we have to use a temp file
temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False, encoding='utf-8')
try:
temp_file.write(stdin)
temp_file.close()
cmd = [self.runtime_info.path, '--script', temp_file.name]
self.logger.debug(f'Running QuickJS: {shlex.join(cmd)}')
with Popen(
cmd,
text=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
) as proc:
stdout, stderr = proc.communicate_or_kill()
if proc.returncode or stderr:
msg = f'Error running QuickJS process (returncode: {proc.returncode})'
if stderr:
msg = f'{msg}: {stderr.strip()}'
raise JsChallengeProviderError(msg)
finally:
pathlib.Path(temp_file.name).unlink(missing_ok=True)
return stdout
@register_preference(QuickJSJCP)
def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int:
return 850

View File

@@ -0,0 +1,17 @@
import importlib.resources
from yt_dlp.extractor.youtube.jsc._builtin.vendor._info import HASHES, VERSION
__all__ = ['HASHES', 'VERSION', 'load_script']
def load_script(filename, error_hook=None):
file = importlib.resources.files(__package__) / filename
if file.is_file():
try:
return file.read_text(encoding='utf-8')
except (OSError, FileNotFoundError, ModuleNotFoundError) as e:
if error_hook:
error_hook(e)
return None
return None

View File

@@ -0,0 +1,11 @@
# This file is generated by devscripts/update_ejs.py. DO NOT MODIFY!
VERSION = '0.3.0'
HASHES = {
'yt.solver.bun.lib.js': '6ff45e94de9f0ea936a183c48173cfa9ce526ee4b7544cd556428427c1dd53c8073ef0174e79b320252bf0e7c64b0032cc1cf9c4358f3fda59033b7caa01c241',
'yt.solver.core.js': '0cd96b2d3f319dfa62cae689efa7d930ef1706e95f5921794db5089b2262957ec0a17d73938d8975ea35d0309cbfb4c8e4418d5e219837215eee242890c8b64d',
'yt.solver.core.min.js': '370d627703002b4a73b10027702734a3de9484f6b56b739942be1dc2b60fee49dee2aa86ed117d1c8ae1ac55181d326481f1fe2e2e8d5211154d48e2a55dac51',
'yt.solver.deno.lib.js': '9c8ee3ab6c23e443a5a951e3ac73c6b8c1c8fb34335e7058a07bf99d349be5573611de00536dcd03ecd3cf34014c4e9b536081de37af3637c5390c6a6fd6a0f0',
'yt.solver.lib.js': '1ee3753a8222fc855f5c39db30a9ccbb7967dbe1fb810e86dc9a89aa073a0907f294c720e9b65427d560a35aa1ce6af19ef854d9126a05ca00afe03f72047733',
'yt.solver.lib.min.js': '8420c259ad16e99ce004e4651ac1bcabb53b4457bf5668a97a9359be9a998a789fee8ab124ee17f91a2ea8fd84e0f2b2fc8eabcaf0b16a186ba734cf422ad053',
}

View File

@@ -0,0 +1,9 @@
/*!
* SPDX-License-Identifier: Unlicense
* This file was automatically generated by https://github.com/yt-dlp/ejs
*/
const lib = {
meriyah: await import('meriyah@6.1.4'),
astring: await import('astring@1.9.0'),
};
export { lib };

View File

@@ -0,0 +1,550 @@
/*!
* SPDX-License-Identifier: Unlicense
* This file was automatically generated by https://github.com/yt-dlp/ejs
*/
var jsc = (function (meriyah, astring) {
'use strict';
function matchesStructure(obj, structure) {
if (Array.isArray(structure)) {
if (!Array.isArray(obj)) {
return false;
}
return (
structure.length === obj.length &&
structure.every((value, index) => matchesStructure(obj[index], value))
);
}
if (typeof structure === 'object') {
if (!obj) {
return !structure;
}
if ('or' in structure) {
return structure.or.some((node) => matchesStructure(obj, node));
}
if ('anykey' in structure && Array.isArray(structure.anykey)) {
const haystack = Array.isArray(obj) ? obj : Object.values(obj);
return structure.anykey.every((value) =>
haystack.some((el) => matchesStructure(el, value)),
);
}
for (const [key, value] of Object.entries(structure)) {
if (!matchesStructure(obj[key], value)) {
return false;
}
}
return true;
}
return structure === obj;
}
function isOneOf(value, ...of) {
return of.includes(value);
}
function _optionalChain$2(ops) {
let lastAccessLHS = undefined;
let value = ops[0];
let i = 1;
while (i < ops.length) {
const op = ops[i];
const fn = ops[i + 1];
i += 2;
if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) {
return undefined;
}
if (op === 'access' || op === 'optionalAccess') {
lastAccessLHS = value;
value = fn(value);
} else if (op === 'call' || op === 'optionalCall') {
value = fn((...args) => value.call(lastAccessLHS, ...args));
lastAccessLHS = undefined;
}
}
return value;
}
const logicalExpression = {
type: 'ExpressionStatement',
expression: {
type: 'LogicalExpression',
left: { type: 'Identifier' },
right: {
type: 'SequenceExpression',
expressions: [
{
type: 'AssignmentExpression',
left: { type: 'Identifier' },
operator: '=',
right: {
type: 'CallExpression',
callee: { type: 'Identifier' },
arguments: {
or: [
[
{ type: 'Literal' },
{
type: 'CallExpression',
callee: {
type: 'Identifier',
name: 'decodeURIComponent',
},
arguments: [{ type: 'Identifier' }],
optional: false,
},
],
[
{
type: 'CallExpression',
callee: {
type: 'Identifier',
name: 'decodeURIComponent',
},
arguments: [{ type: 'Identifier' }],
optional: false,
},
],
],
},
optional: false,
},
},
{ type: 'CallExpression' },
],
},
operator: '&&',
},
};
const identifier$1 = {
or: [
{
type: 'ExpressionStatement',
expression: {
type: 'AssignmentExpression',
operator: '=',
left: { type: 'Identifier' },
right: { type: 'FunctionExpression', params: [{}, {}, {}] },
},
},
{ type: 'FunctionDeclaration', params: [{}, {}, {}] },
{
type: 'VariableDeclaration',
declarations: {
anykey: [
{
type: 'VariableDeclarator',
init: { type: 'FunctionExpression', params: [{}, {}, {}] },
},
],
},
},
],
};
function extract$1(node) {
if (!matchesStructure(node, identifier$1)) {
return null;
}
let block;
if (
node.type === 'ExpressionStatement' &&
node.expression.type === 'AssignmentExpression' &&
node.expression.right.type === 'FunctionExpression'
) {
block = node.expression.right.body;
} else if (node.type === 'VariableDeclaration') {
for (const decl of node.declarations) {
if (
decl.type === 'VariableDeclarator' &&
_optionalChain$2([
decl,
'access',
(_) => _.init,
'optionalAccess',
(_2) => _2.type,
]) === 'FunctionExpression' &&
_optionalChain$2([
decl,
'access',
(_3) => _3.init,
'optionalAccess',
(_4) => _4.params,
'access',
(_5) => _5.length,
]) === 3
) {
block = decl.init.body;
break;
}
}
} else if (node.type === 'FunctionDeclaration') {
block = node.body;
} else {
return null;
}
const relevantExpression = _optionalChain$2([
block,
'optionalAccess',
(_6) => _6.body,
'access',
(_7) => _7.at,
'call',
(_8) => _8(-2),
]);
if (!matchesStructure(relevantExpression, logicalExpression)) {
return null;
}
if (
_optionalChain$2([
relevantExpression,
'optionalAccess',
(_9) => _9.type,
]) !== 'ExpressionStatement' ||
relevantExpression.expression.type !== 'LogicalExpression' ||
relevantExpression.expression.right.type !== 'SequenceExpression' ||
relevantExpression.expression.right.expressions[0].type !==
'AssignmentExpression'
) {
return null;
}
const call = relevantExpression.expression.right.expressions[0].right;
if (call.type !== 'CallExpression' || call.callee.type !== 'Identifier') {
return null;
}
return {
type: 'ArrowFunctionExpression',
params: [{ type: 'Identifier', name: 'sig' }],
body: {
type: 'CallExpression',
callee: { type: 'Identifier', name: call.callee.name },
arguments:
call.arguments.length === 1
? [{ type: 'Identifier', name: 'sig' }]
: [call.arguments[0], { type: 'Identifier', name: 'sig' }],
optional: false,
},
async: false,
expression: false,
generator: false,
};
}
function _optionalChain$1(ops) {
let lastAccessLHS = undefined;
let value = ops[0];
let i = 1;
while (i < ops.length) {
const op = ops[i];
const fn = ops[i + 1];
i += 2;
if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) {
return undefined;
}
if (op === 'access' || op === 'optionalAccess') {
lastAccessLHS = value;
value = fn(value);
} else if (op === 'call' || op === 'optionalCall') {
value = fn((...args) => value.call(lastAccessLHS, ...args));
lastAccessLHS = undefined;
}
}
return value;
}
const identifier = {
or: [
{
type: 'VariableDeclaration',
kind: 'var',
declarations: {
anykey: [
{
type: 'VariableDeclarator',
id: { type: 'Identifier' },
init: {
type: 'ArrayExpression',
elements: [{ type: 'Identifier' }],
},
},
],
},
},
{
type: 'ExpressionStatement',
expression: {
type: 'AssignmentExpression',
left: { type: 'Identifier' },
operator: '=',
right: {
type: 'ArrayExpression',
elements: [{ type: 'Identifier' }],
},
},
},
],
};
const catchBlockBody = [
{
type: 'ReturnStatement',
argument: {
type: 'BinaryExpression',
left: {
type: 'MemberExpression',
object: { type: 'Identifier' },
computed: true,
property: { type: 'Literal' },
optional: false,
},
right: { type: 'Identifier' },
operator: '+',
},
},
];
function extract(node) {
if (!matchesStructure(node, identifier)) {
let name = null;
let block = null;
switch (node.type) {
case 'ExpressionStatement': {
if (
node.expression.type === 'AssignmentExpression' &&
node.expression.left.type === 'Identifier' &&
node.expression.right.type === 'FunctionExpression' &&
node.expression.right.params.length === 1
) {
name = node.expression.left.name;
block = node.expression.right.body;
}
break;
}
case 'FunctionDeclaration': {
if (node.params.length === 1) {
name = _optionalChain$1([
node,
'access',
(_) => _.id,
'optionalAccess',
(_2) => _2.name,
]);
block = node.body;
}
break;
}
}
if (!block || !name) {
return null;
}
const tryNode = block.body.at(-2);
if (
_optionalChain$1([tryNode, 'optionalAccess', (_3) => _3.type]) !==
'TryStatement' ||
_optionalChain$1([
tryNode,
'access',
(_4) => _4.handler,
'optionalAccess',
(_5) => _5.type,
]) !== 'CatchClause'
) {
return null;
}
const catchBody = tryNode.handler.body.body;
if (matchesStructure(catchBody, catchBlockBody)) {
return makeSolverFuncFromName(name);
}
return null;
}
if (node.type === 'VariableDeclaration') {
for (const declaration of node.declarations) {
if (
declaration.type !== 'VariableDeclarator' ||
!declaration.init ||
declaration.init.type !== 'ArrayExpression' ||
declaration.init.elements.length !== 1
) {
continue;
}
const [firstElement] = declaration.init.elements;
if (firstElement && firstElement.type === 'Identifier') {
return makeSolverFuncFromName(firstElement.name);
}
}
} else if (node.type === 'ExpressionStatement') {
const expr = node.expression;
if (
expr.type === 'AssignmentExpression' &&
expr.left.type === 'Identifier' &&
expr.operator === '=' &&
expr.right.type === 'ArrayExpression' &&
expr.right.elements.length === 1
) {
const [firstElement] = expr.right.elements;
if (firstElement && firstElement.type === 'Identifier') {
return makeSolverFuncFromName(firstElement.name);
}
}
}
return null;
}
function makeSolverFuncFromName(name) {
return {
type: 'ArrowFunctionExpression',
params: [{ type: 'Identifier', name: 'n' }],
body: {
type: 'CallExpression',
callee: { type: 'Identifier', name: name },
arguments: [{ type: 'Identifier', name: 'n' }],
optional: false,
},
async: false,
expression: false,
generator: false,
};
}
const setupNodes = meriyah.parse(
`\nif (typeof globalThis.XMLHttpRequest === "undefined") {\n globalThis.XMLHttpRequest = { prototype: {} };\n}\nconst window = Object.create(null);\nif (typeof URL === "undefined") {\n window.location = {\n hash: "",\n host: "www.youtube.com",\n hostname: "www.youtube.com",\n href: "https://www.youtube.com/watch?v=yt-dlp-wins",\n origin: "https://www.youtube.com",\n password: "",\n pathname: "/watch",\n port: "",\n protocol: "https:",\n search: "?v=yt-dlp-wins",\n username: "",\n };\n} else {\n window.location = new URL("https://www.youtube.com/watch?v=yt-dlp-wins");\n}\nif (typeof globalThis.document === "undefined") {\n globalThis.document = Object.create(null);\n}\nif (typeof globalThis.navigator === "undefined") {\n globalThis.navigator = Object.create(null);\n}\nif (typeof globalThis.self === "undefined") {\n globalThis.self = globalThis;\n}\n`,
).body;
function _optionalChain(ops) {
let lastAccessLHS = undefined;
let value = ops[0];
let i = 1;
while (i < ops.length) {
const op = ops[i];
const fn = ops[i + 1];
i += 2;
if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) {
return undefined;
}
if (op === 'access' || op === 'optionalAccess') {
lastAccessLHS = value;
value = fn(value);
} else if (op === 'call' || op === 'optionalCall') {
value = fn((...args) => value.call(lastAccessLHS, ...args));
lastAccessLHS = undefined;
}
}
return value;
}
function preprocessPlayer(data) {
const ast = meriyah.parse(data);
const body = ast.body;
const block = (() => {
switch (body.length) {
case 1: {
const func = body[0];
if (
_optionalChain([func, 'optionalAccess', (_) => _.type]) ===
'ExpressionStatement' &&
func.expression.type === 'CallExpression' &&
func.expression.callee.type === 'MemberExpression' &&
func.expression.callee.object.type === 'FunctionExpression'
) {
return func.expression.callee.object.body;
}
break;
}
case 2: {
const func = body[1];
if (
_optionalChain([func, 'optionalAccess', (_2) => _2.type]) ===
'ExpressionStatement' &&
func.expression.type === 'CallExpression' &&
func.expression.callee.type === 'FunctionExpression'
) {
const block = func.expression.callee.body;
block.body.splice(0, 1);
return block;
}
break;
}
}
throw 'unexpected structure';
})();
const found = { n: [], sig: [] };
const plainExpressions = block.body.filter((node) => {
const n = extract(node);
if (n) {
found.n.push(n);
}
const sig = extract$1(node);
if (sig) {
found.sig.push(sig);
}
if (node.type === 'ExpressionStatement') {
if (node.expression.type === 'AssignmentExpression') {
return true;
}
return node.expression.type === 'Literal';
}
return true;
});
block.body = plainExpressions;
for (const [name, options] of Object.entries(found)) {
const unique = new Set(options.map((x) => JSON.stringify(x)));
if (unique.size !== 1) {
const message = `found ${unique.size} ${name} function possibilities`;
throw (
message +
(unique.size
? `: ${options.map((x) => astring.generate(x)).join(', ')}`
: '')
);
}
plainExpressions.push({
type: 'ExpressionStatement',
expression: {
type: 'AssignmentExpression',
operator: '=',
left: {
type: 'MemberExpression',
computed: false,
object: { type: 'Identifier', name: '_result' },
property: { type: 'Identifier', name: name },
},
right: options[0],
},
});
}
ast.body.splice(0, 0, ...setupNodes);
return astring.generate(ast);
}
function getFromPrepared(code) {
const resultObj = { n: null, sig: null };
Function('_result', code)(resultObj);
return resultObj;
}
function main(input) {
const preprocessedPlayer =
input.type === 'player'
? preprocessPlayer(input.player)
: input.preprocessed_player;
const solvers = getFromPrepared(preprocessedPlayer);
const responses = input.requests.map((input) => {
if (!isOneOf(input.type, 'n', 'sig')) {
return { type: 'error', error: `Unknown request type: ${input.type}` };
}
const solver = solvers[input.type];
if (!solver) {
return {
type: 'error',
error: `Failed to extract ${input.type} function`,
};
}
try {
return {
type: 'result',
data: Object.fromEntries(
input.challenges.map((challenge) => [challenge, solver(challenge)]),
),
};
} catch (error) {
return {
type: 'error',
error:
error instanceof Error
? `${error.message}\n${error.stack}`
: `${error}`,
};
}
});
const output = { type: 'result', responses: responses };
if (input.type === 'player' && input.output_preprocessed) {
output.preprocessed_player = preprocessedPlayer;
}
return output;
}
return main;
})(meriyah, astring);

View File

@@ -0,0 +1,9 @@
/*!
* SPDX-License-Identifier: Unlicense
* This file was automatically generated by https://github.com/yt-dlp/ejs
*/
const lib = {
meriyah: await import('npm:meriyah@6.1.4'),
astring: await import('npm:astring@1.9.0'),
};
export { lib };

View File

@@ -0,0 +1,287 @@
from __future__ import annotations
import collections
import dataclasses
import typing
from yt_dlp.extractor.youtube.jsc._builtin.ejs import _EJS_WIKI_URL
from yt_dlp.extractor.youtube.jsc._registry import (
_jsc_preferences,
_jsc_providers,
)
from yt_dlp.extractor.youtube.jsc.provider import (
JsChallengeProvider,
JsChallengeProviderError,
JsChallengeProviderRejectedRequest,
JsChallengeProviderResponse,
JsChallengeRequest,
JsChallengeResponse,
JsChallengeType,
NChallengeInput,
NChallengeOutput,
SigChallengeInput,
SigChallengeOutput,
)
from yt_dlp.extractor.youtube.pot._director import YoutubeIEContentProviderLogger, provider_display_list
from yt_dlp.extractor.youtube.pot._provider import (
IEContentProviderLogger,
)
from yt_dlp.extractor.youtube.pot.provider import (
provider_bug_report_message,
)
if typing.TYPE_CHECKING:
from collections.abc import Iterable
from yt_dlp.extractor.youtube.jsc._builtin.ejs import _SkippedComponent
from yt_dlp.extractor.youtube.jsc.provider import Preference as JsChallengePreference
class JsChallengeRequestDirector:
def __init__(self, logger: IEContentProviderLogger):
self.providers: dict[str, JsChallengeProvider] = {}
self.preferences: list[JsChallengePreference] = []
self.logger = logger
def register_provider(self, provider: JsChallengeProvider):
self.providers[provider.PROVIDER_KEY] = provider
def register_preference(self, preference: JsChallengePreference):
self.preferences.append(preference)
def _get_providers(self, requests: list[JsChallengeRequest]) -> Iterable[JsChallengeProvider]:
"""Sorts available providers by preference, given a request"""
preferences = {
provider: sum(pref(provider, requests) for pref in self.preferences)
for provider in self.providers.values()
}
if self.logger.log_level <= self.logger.LogLevel.TRACE:
# calling is_available() for every JS Challenge provider upfront may have some overhead
self.logger.trace(f'JS Challenge Providers: {provider_display_list(self.providers.values())}')
self.logger.trace('JS Challenge Provider preferences for this request: {}'.format(', '.join(
f'{provider.PROVIDER_NAME}={pref}' for provider, pref in preferences.items())))
return (
provider for provider in sorted(
self.providers.values(), key=preferences.get, reverse=True)
if provider.is_available()
)
def _handle_error(self, e: Exception, provider: JsChallengeProvider, requests: list[JsChallengeRequest]):
if isinstance(e, JsChallengeProviderRejectedRequest):
self.logger.trace(
f'JS Challenge Provider "{provider.PROVIDER_NAME}" rejected '
f'{"this request" if len(requests) == 1 else f"{len(requests)} requests"}, '
f'trying next available provider. Reason: {e}',
)
elif isinstance(e, JsChallengeProviderError):
if len(requests) == 1:
self.logger.warning(
f'Error solving {requests[0].type.value} challenge request using "{provider.PROVIDER_NAME}" provider: {e}.\n'
f' input = {requests[0].input}\n'
f' {(provider_bug_report_message(provider, before="") if not e.expected else "")}')
else:
self.logger.warning(
f'Error solving {len(requests)} challenge requests using "{provider.PROVIDER_NAME}" provider: {e}.\n'
f' requests = {requests}\n'
f' {(provider_bug_report_message(provider, before="") if not e.expected else "")}')
else:
self.logger.error(
f'Unexpected error solving {len(requests)} challenge request(s) using "{provider.PROVIDER_NAME}" provider: {e!r}\n'
f' requests = {requests}\n'
f' {provider_bug_report_message(provider, before="")}', cause=e)
def bulk_solve(self, requests: list[JsChallengeRequest]) -> list[tuple[JsChallengeRequest, JsChallengeResponse]]:
"""Solves multiple JS Challenges in bulk, returning a list of responses"""
if not self.providers:
self.logger.trace('No JS Challenge providers registered')
return []
results = []
next_requests = requests[:]
skipped_components = []
for provider in self._get_providers(next_requests):
if not next_requests:
break
self.logger.trace(
f'Attempting to solve {len(next_requests)} challenges using "{provider.PROVIDER_NAME}" provider')
try:
for response in provider.bulk_solve([dataclasses.replace(request) for request in next_requests]):
if not validate_provider_response(response):
self.logger.warning(
f'JS Challenge Provider "{provider.PROVIDER_NAME}" returned an invalid response:'
f' response = {response!r}\n'
f' {provider_bug_report_message(provider, before="")}')
continue
if response.error:
self._handle_error(response.error, provider, [response.request])
continue
if (vr_msg := validate_response(response.response, response.request)) is not True:
self.logger.warning(
f'Invalid JS Challenge response received from "{provider.PROVIDER_NAME}" provider: {vr_msg or ""}\n'
f' response = {response.response}\n'
f' request = {response.request}\n'
f' {provider_bug_report_message(provider, before="")}')
continue
try:
next_requests.remove(response.request)
except ValueError:
self.logger.warning(
f'JS Challenge Provider "{provider.PROVIDER_NAME}" returned a response for an unknown request:\n'
f' request = {response.request}\n'
f' {provider_bug_report_message(provider, before="")}')
continue
results.append((response.request, response.response))
except Exception as e:
if isinstance(e, JsChallengeProviderRejectedRequest) and e._skipped_components:
skipped_components.extend(e._skipped_components)
self._handle_error(e, provider, next_requests)
continue
if skipped_components:
self.__report_skipped_components(skipped_components)
if len(results) != len(requests):
self.logger.trace(
f'Not all JS Challenges were solved, expected {len(requests)} responses, got {len(results)}')
self.logger.trace(f'Unsolved requests: {next_requests}')
else:
self.logger.trace(f'Solved all {len(requests)} requested JS Challenges')
return results
def __report_skipped_components(self, components: list[_SkippedComponent], /):
runtime_components = collections.defaultdict(list)
for component in components:
runtime_components[component.component].append(component.runtime)
for runtimes in runtime_components.values():
runtimes.sort()
description_lookup = {
'ejs:npm': 'NPM package',
'ejs:github': 'challenge solver script',
}
descriptions = [
f'{description_lookup.get(component, component)} ({", ".join(runtimes)})'
for component, runtimes in runtime_components.items()
if runtimes
]
flags = [
f' --remote-components {f"{component} (recommended)" if component == "ejs:github" else f"{component} "}'
for component, runtimes in runtime_components.items()
if runtimes
]
def join_parts(parts, joiner):
if not parts:
return ''
if len(parts) == 1:
return parts[0]
return f'{", ".join(parts[:-1])} {joiner} {parts[-1]}'
if len(descriptions) > 1:
msg = (
f'Remote component {descriptions[0]} was skipped. '
f'It may be required to solve JS challenges. '
f'You can enable the download with {flags[0]}')
else:
msg = (
f'Remote components {join_parts(descriptions, "and")} were skipped. '
f'These may be required to solve JS challenges. '
f'You can enable these downloads with {join_parts(flags, "or")}, respectively')
self.logger.warning(f'{msg}. For more information and alternatives, refer to {_EJS_WIKI_URL}')
def close(self):
for provider in self.providers.values():
provider.close()
EXTRACTOR_ARG_PREFIX = 'youtubejsc'
def initialize_jsc_director(ie):
assert ie._downloader is not None, 'Downloader not set'
enable_trace = ie._configuration_arg(
'jsc_trace', ['false'], ie_key='youtube', casesense=False)[0] == 'true'
if enable_trace:
log_level = IEContentProviderLogger.LogLevel.TRACE
elif ie.get_param('verbose', False):
log_level = IEContentProviderLogger.LogLevel.DEBUG
else:
log_level = IEContentProviderLogger.LogLevel.INFO
def get_provider_logger_and_settings(provider, logger_key):
logger_prefix = f'{logger_key}:{provider.PROVIDER_NAME}'
extractor_key = f'{EXTRACTOR_ARG_PREFIX}-{provider.PROVIDER_KEY.lower()}'
return (
YoutubeIEContentProviderLogger(ie, logger_prefix, log_level=log_level),
ie.get_param('extractor_args', {}).get(extractor_key, {}))
director = JsChallengeRequestDirector(
logger=YoutubeIEContentProviderLogger(ie, 'jsc', log_level=log_level),
)
ie._downloader.add_close_hook(director.close)
for provider in _jsc_providers.value.values():
logger, settings = get_provider_logger_and_settings(provider, 'jsc')
director.register_provider(provider(ie, logger, settings))
for preference in _jsc_preferences.value:
director.register_preference(preference)
if director.logger.log_level <= director.logger.LogLevel.DEBUG:
# calling is_available() for every JS Challenge provider upfront may have some overhead
director.logger.debug(f'JS Challenge Providers: {provider_display_list(director.providers.values())}')
director.logger.trace(f'Registered {len(director.preferences)} JS Challenge provider preferences')
return director
def validate_provider_response(response: JsChallengeProviderResponse) -> bool:
return (
isinstance(response, JsChallengeProviderResponse)
and isinstance(response.request, JsChallengeRequest)
and (
isinstance(response.response, JsChallengeResponse)
or (response.error is not None and isinstance(response.error, Exception)))
)
def validate_response(response: JsChallengeResponse, request: JsChallengeRequest) -> bool | str:
if not isinstance(response, JsChallengeResponse):
return 'Response is not a JsChallengeResponse'
if request.type == JsChallengeType.N:
return validate_nsig_challenge_output(response.output, request.input)
else:
return validate_sig_challenge_output(response.output, request.input)
def validate_nsig_challenge_output(challenge_output: NChallengeOutput, challenge_input: NChallengeInput) -> bool | str:
if not (
isinstance(challenge_output, NChallengeOutput)
and len(challenge_output.results) == len(challenge_input.challenges)
and all(isinstance(k, str) and isinstance(v, str) for k, v in challenge_output.results.items())
and all(challenge in challenge_output.results for challenge in challenge_input.challenges)
):
return 'Invalid NChallengeOutput'
# Validate n results are valid - if they end with the input challenge then the js function returned with an exception.
for challenge, result in challenge_output.results.items():
if result.endswith(challenge):
return f'n result is invalid for {challenge!r}: {result!r}'
return True
def validate_sig_challenge_output(challenge_output: SigChallengeOutput, challenge_input: SigChallengeInput) -> bool:
return (
isinstance(challenge_output, SigChallengeOutput)
and len(challenge_output.results) == len(challenge_input.challenges)
and all(isinstance(k, str) and isinstance(v, str) for k, v in challenge_output.results.items())
and all(challenge in challenge_output.results for challenge in challenge_input.challenges)
) or 'Invalid SigChallengeOutput'

View File

@@ -0,0 +1,4 @@
from yt_dlp.globals import Indirect
_jsc_providers = Indirect({})
_jsc_preferences = Indirect(set())

View File

@@ -0,0 +1,161 @@
"""PUBLIC API"""
from __future__ import annotations
import abc
import dataclasses
import enum
import typing
from yt_dlp.extractor.youtube.jsc._registry import _jsc_preferences, _jsc_providers
from yt_dlp.extractor.youtube.pot._provider import (
IEContentProvider,
IEContentProviderError,
register_preference_generic,
register_provider_generic,
)
from yt_dlp.utils import ExtractorError
__all__ = [
'JsChallengeProvider',
'JsChallengeProviderError',
'JsChallengeProviderRejectedRequest',
'JsChallengeProviderResponse',
'JsChallengeRequest',
'JsChallengeResponse',
'JsChallengeType',
'NChallengeInput',
'NChallengeOutput',
'SigChallengeInput',
'SigChallengeOutput',
'register_preference',
'register_provider',
]
class JsChallengeType(enum.Enum):
N = 'n'
SIG = 'sig'
@dataclasses.dataclass(frozen=True)
class JsChallengeRequest:
type: JsChallengeType
input: NChallengeInput | SigChallengeInput
video_id: str | None = None
@dataclasses.dataclass(frozen=True)
class NChallengeInput:
player_url: str
challenges: list[str] = dataclasses.field(default_factory=list)
@dataclasses.dataclass(frozen=True)
class SigChallengeInput:
player_url: str
challenges: list[str] = dataclasses.field(default_factory=list)
@dataclasses.dataclass(frozen=True)
class NChallengeOutput:
results: dict[str, str] = dataclasses.field(default_factory=dict)
@dataclasses.dataclass(frozen=True)
class SigChallengeOutput:
results: dict[str, str] = dataclasses.field(default_factory=dict)
@dataclasses.dataclass
class JsChallengeProviderResponse:
request: JsChallengeRequest
response: JsChallengeResponse | None = None
error: Exception | None = None
@dataclasses.dataclass
class JsChallengeResponse:
type: JsChallengeType
output: NChallengeOutput | SigChallengeOutput
class JsChallengeProviderRejectedRequest(IEContentProviderError):
"""Reject the JsChallengeRequest (cannot handle the request)"""
def __init__(self, msg=None, expected: bool = False, *, _skipped_components=None):
super().__init__(msg, expected)
self._skipped_components = _skipped_components
class JsChallengeProviderError(IEContentProviderError):
"""An error occurred while solving the challenge"""
class JsChallengeProvider(IEContentProvider, abc.ABC, suffix='JCP'):
# Set to None to disable the check
_SUPPORTED_TYPES: tuple[JsChallengeType] | None = ()
def __validate_request(self, request: JsChallengeRequest):
if not self.is_available():
raise JsChallengeProviderRejectedRequest(f'{self.PROVIDER_NAME} is not available')
# Validate request using built-in settings
if (
self._SUPPORTED_TYPES is not None
and request.type not in self._SUPPORTED_TYPES
):
raise JsChallengeProviderRejectedRequest(
f'JS Challenge type "{request.type}" is not supported by {self.PROVIDER_NAME}')
def bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
"""Solve multiple JS challenges and return the results"""
validated_requests = []
for request in requests:
try:
self.__validate_request(request)
validated_requests.append(request)
except JsChallengeProviderRejectedRequest as e:
yield JsChallengeProviderResponse(request=request, error=e)
continue
yield from self._real_bulk_solve(validated_requests)
@abc.abstractmethod
def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]:
"""Subclasses can override this method to handle bulk solving"""
raise NotImplementedError(f'{self.PROVIDER_NAME} does not implement bulk solving')
def _get_player(self, video_id, player_url):
try:
return self.ie._load_player(
video_id=video_id,
player_url=player_url,
fatal=True,
)
except ExtractorError as e:
raise JsChallengeProviderError(
f'Failed to load player for JS challenge: {e}') from e
def register_provider(provider: type[JsChallengeProvider]):
"""Register a JsChallengeProvider class"""
return register_provider_generic(
provider=provider,
base_class=JsChallengeProvider,
registry=_jsc_providers.value,
)
def register_preference(*providers: type[JsChallengeProvider]) -> typing.Callable[[Preference], Preference]:
"""Register a preference for a JsChallengeProvider class."""
return register_preference_generic(
JsChallengeProvider,
_jsc_preferences.value,
*providers,
)
if typing.TYPE_CHECKING:
Preference = typing.Callable[[JsChallengeProvider, list[JsChallengeRequest]], int]
__all__.append('Preference')

View File

@@ -6,6 +6,7 @@ import dataclasses
import datetime as dt
import hashlib
import json
import traceback
import typing
import urllib.parse
from collections.abc import Iterable
@@ -58,9 +59,9 @@ class YoutubeIEContentProviderLogger(IEContentProviderLogger):
if self.log_level <= self.LogLevel.TRACE:
self.__ie.write_debug(self._format_msg('TRACE: ' + message))
def debug(self, message: str):
def debug(self, message: str, *, once=False):
if self.log_level <= self.LogLevel.DEBUG:
self.__ie.write_debug(self._format_msg(message))
self.__ie.write_debug(self._format_msg(message), only_once=once)
def info(self, message: str):
if self.log_level <= self.LogLevel.INFO:
@@ -70,9 +71,11 @@ class YoutubeIEContentProviderLogger(IEContentProviderLogger):
if self.log_level <= self.LogLevel.WARNING:
self.__ie.report_warning(self._format_msg(message), only_once=once)
def error(self, message: str):
def error(self, message: str, cause=None):
if self.log_level <= self.LogLevel.ERROR:
self.__ie._downloader.report_error(self._format_msg(message), is_error=False)
self.__ie._downloader.report_error(
self._format_msg(message), is_error=False,
tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)) if cause else None)
class PoTokenCache:

View File

@@ -36,7 +36,7 @@ class IEContentProviderLogger(abc.ABC):
pass
@abc.abstractmethod
def debug(self, message: str):
def debug(self, message: str, *, once=False):
pass
@abc.abstractmethod
@@ -48,7 +48,7 @@ class IEContentProviderLogger(abc.ABC):
pass
@abc.abstractmethod
def error(self, message: str):
def error(self, message: str, cause=None):
pass
@@ -90,7 +90,7 @@ class IEContentProvider(abc.ABC):
@classproperty
def PROVIDER_KEY(cls) -> str:
assert hasattr(cls, '_PROVIDER_KEY_SUFFIX'), 'Content Provider implementation must define a suffix for the provider key'
assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'PoTokenProvider class names must end with "{cls._PROVIDER_KEY_SUFFIX}"'
assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'Class name must end with "{cls._PROVIDER_KEY_SUFFIX}"'
return cls.__name__[:-len(cls._PROVIDER_KEY_SUFFIX)]
@abc.abstractmethod
@@ -114,10 +114,7 @@ class IEContentProvider(abc.ABC):
@param default The default value to return when the key is not present (default: [])
@param casesense When false, the values are converted to lower case
"""
val = traverse_obj(self.settings, key)
if val is None:
return [] if default is NO_DEFAULT else default
return list(val) if casesense else [x.lower() for x in val]
return configuration_arg(self.settings, key, default=default, casesense=casesense)
class BuiltinIEContentProvider(IEContentProvider, abc.ABC):
@@ -125,6 +122,20 @@ class BuiltinIEContentProvider(IEContentProvider, abc.ABC):
BUG_REPORT_MESSAGE = bug_reports_message(before='')
def configuration_arg(config, key, default=NO_DEFAULT, *, casesense=False):
"""
@returns A list of values for the setting given by "key"
or "default" if no such key is present
@param config The configuration dictionary
@param default The default value to return when the key is not present (default: [])
@param casesense When false, the values are converted to lower case
"""
val = traverse_obj(config, key)
if val is None:
return [] if default is NO_DEFAULT else default
return list(val) if casesense else [x.lower() for x in val]
def register_provider_generic(
provider,
base_class,

View File

@@ -1,3 +1,4 @@
from __future__ import annotations
import os
from collections import defaultdict
@@ -30,3 +31,11 @@ plugin_ies_overrides = Indirect(defaultdict(list))
IN_CLI = Indirect(False)
LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled
WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None)
# JS Runtimes
# If adding support for another runtime, register it here to allow `js_runtimes` option to accept it.
# key is the runtime name, value a JsRuntime subclass (internal-only) or None
supported_js_runtimes = Indirect({})
# List of remote components supported with --remote-components option
supported_remote_components = Indirect([])

View File

@@ -456,6 +456,41 @@ def create_parser():
'--no-plugin-dirs',
dest='plugin_dirs', action='store_const', const=[],
help='Clear plugin directories to search, including defaults and those provided by previous --plugin-dirs')
general.add_option(
'--js-runtimes',
metavar='RUNTIME[:PATH]',
dest='js_runtimes',
action='callback',
callback=_list_from_options_callback,
type='str',
callback_kwargs={'delim': None},
default=['deno'],
help=(
'Additional JavaScript runtime to enable, with an optional path to the runtime location. '
'This option can be used multiple times to enable multiple runtimes. '
'Supported runtimes: deno, node, bun, quickjs. By default, only "deno" runtime is enabled.'))
general.add_option(
'--no-js-runtimes',
dest='js_runtimes', action='store_const', const=[],
help='Clear JavaScript runtimes to enable, including defaults and those provided by previous --js-runtimes')
general.add_option(
'--remote-components',
metavar='COMPONENT',
dest='remote_components',
action='callback',
callback=_list_from_options_callback,
type='str',
callback_kwargs={'delim': None},
default=[],
help=(
'Remote components to allow yt-dlp to fetch when required. '
'You can use this option multiple times to allow multiple components. '
'Supported values: ejs:npm (external JavaScript components from npm), ejs:github (external JavaScript components from yt-dlp-ejs GitHub). '
'By default, no remote components are allowed.'))
general.add_option(
'--no-remote-components',
dest='remote_components', action='store_const', const=[],
help='Disallow fetching of all remote components, including any previously allowed by --remote-components or defaults.')
general.add_option(
'--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist', default=False,

101
yt_dlp/utils/_jsruntime.py Normal file
View File

@@ -0,0 +1,101 @@
from __future__ import annotations
import abc
import dataclasses
import functools
from ._utils import _get_exe_version_output, detect_exe_version, int_or_none
# NOT public API
def runtime_version_tuple(v):
# NB: will return (0,) if `v` is an invalid version string
return tuple(int_or_none(x, default=0) for x in v.split('.'))
@dataclasses.dataclass(frozen=True)
class JsRuntimeInfo:
name: str
path: str
version: str
version_tuple: tuple[int, ...]
supported: bool = True
class JsRuntime(abc.ABC):
def __init__(self, path=None):
self._path = path
@functools.cached_property
def info(self) -> JsRuntimeInfo | None:
return self._info()
@abc.abstractmethod
def _info(self) -> JsRuntimeInfo | None:
raise NotImplementedError
class DenoJsRuntime(JsRuntime):
MIN_SUPPORTED_VERSION = (2, 0, 0)
def _info(self):
path = self._path or 'deno'
out = _get_exe_version_output(path, ['--version'])
if not out:
return None
version = detect_exe_version(out, r'^deno (\S+)', 'unknown')
vt = runtime_version_tuple(version)
return JsRuntimeInfo(
name='deno', path=path, version=version, version_tuple=vt,
supported=vt >= self.MIN_SUPPORTED_VERSION)
class BunJsRuntime(JsRuntime):
MIN_SUPPORTED_VERSION = (1, 0, 31)
def _info(self):
path = self._path or 'bun'
out = _get_exe_version_output(path, ['--version'])
if not out:
return None
version = detect_exe_version(out, r'^(\S+)', 'unknown')
vt = runtime_version_tuple(version)
return JsRuntimeInfo(
name='bun', path=path, version=version, version_tuple=vt,
supported=vt >= self.MIN_SUPPORTED_VERSION)
class NodeJsRuntime(JsRuntime):
MIN_SUPPORTED_VERSION = (20, 0, 0)
def _info(self):
path = self._path or 'node'
out = _get_exe_version_output(path, ['--version'])
if not out:
return None
version = detect_exe_version(out, r'^v(\S+)', 'unknown')
vt = runtime_version_tuple(version)
return JsRuntimeInfo(
name='node', path=path, version=version, version_tuple=vt,
supported=vt >= self.MIN_SUPPORTED_VERSION)
class QuickJsRuntime(JsRuntime):
MIN_SUPPORTED_VERSION = (2023, 12, 9)
def _info(self):
path = self._path or 'qjs'
# quickjs does not have --version and --help returns a status code of 1
out = _get_exe_version_output(path, ['--help'], ignore_return_code=True)
if not out:
return None
is_ng = 'QuickJS-ng' in out
version = detect_exe_version(out, r'^QuickJS(?:-ng)?\s+version\s+(\S+)', 'unknown')
vt = runtime_version_tuple(version.replace('-', '.'))
if is_ng:
return JsRuntimeInfo(
name='quickjs-ng', path=path, version=version, version_tuple=vt,
supported=vt > (0,))
return JsRuntimeInfo(
name='quickjs', path=path, version=version, version_tuple=vt,
supported=vt >= self.MIN_SUPPORTED_VERSION)

View File

@@ -2150,14 +2150,14 @@ def check_executable(exe, args=[]):
return exe
def _get_exe_version_output(exe, args):
def _get_exe_version_output(exe, args, ignore_return_code=False):
try:
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if yt-dlp is run in the background.
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
stdout, _, ret = Popen.run([encodeArgument(exe), *args], text=True,
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
if ret:
if not ignore_return_code and ret:
return None
except OSError:
return False