diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fc89dbfe05..69f8eb0c8d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -214,7 +214,7 @@ jobs: - name: Build Unix platform-independent binary run: | - make all tar + make all-extra tar - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION diff --git a/.github/workflows/challenge-tests.yml b/.github/workflows/challenge-tests.yml new file mode 100644 index 0000000000..89895eb07b --- /dev/null +++ b/.github/workflows/challenge-tests.yml @@ -0,0 +1,77 @@ +name: Challenge Tests +on: + push: + paths: + - .github/workflows/challenge-tests.yml + - test/test_jsc/*.py + - yt_dlp/extractor/youtube/jsc/**.js + - yt_dlp/extractor/youtube/jsc/**.py + - yt_dlp/extractor/youtube/pot/**.py + - yt_dlp/utils/_jsruntime.py + pull_request: + paths: + - .github/workflows/challenge-tests.yml + - test/test_jsc/*.py + - yt_dlp/extractor/youtube/jsc/**.js + - yt_dlp/extractor/youtube/jsc/**.py + - yt_dlp/extractor/youtube/pot/**.py + - yt_dlp/utils/_jsruntime.py +permissions: + contents: read + +concurrency: + group: challenge-tests-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + tests: + name: Challenge Tests + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', pypy-3.11] + env: + QJS_VERSION: '2025-04-26' # Earliest version with rope strings + steps: + - uses: actions/checkout@v5 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + - name: Install Deno + uses: denoland/setup-deno@v2 + with: + deno-version: '2.0.0' # minimum supported version + - name: Install Bun + uses: oven-sh/setup-bun@v2 + with: + # minimum supported version is 1.0.31 but earliest available Windows version is 1.1.0 + bun-version: ${{ (matrix.os == 'windows-latest' && '1.1.0') || '1.0.31' }} + - name: Install Node + uses: actions/setup-node@v6 + with: + node-version: '20.0' # minimum supported version + - name: Install QuickJS (Linux) + if: matrix.os == 'ubuntu-latest' + run: | + wget "https://bellard.org/quickjs/binary_releases/quickjs-linux-x86_64-${QJS_VERSION}.zip" -O quickjs.zip + unzip quickjs.zip qjs + sudo install qjs /usr/local/bin/qjs + - name: Install QuickJS (Windows) + if: matrix.os == 'windows-latest' + shell: pwsh + run: | + Invoke-WebRequest "https://bellard.org/quickjs/binary_releases/quickjs-win-x86_64-${Env:QJS_VERSION}.zip" -OutFile quickjs.zip + unzip quickjs.zip + - name: Install test requirements + run: | + python ./devscripts/install_deps.py --print --only-optional-groups --include-group test > requirements.txt + python ./devscripts/install_deps.py --print -c certifi -c requests -c urllib3 -c yt-dlp-ejs >> requirements.txt + python -m pip install -U -r requirements.txt + - name: Run tests + timeout-minutes: 15 + run: | + python -m yt_dlp -v --js-runtimes node --js-runtimes bun --js-runtimes quickjs || true + python ./devscripts/run_tests.py test/test_jsc -k download diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index e813b8f629..ae3dc95e1b 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -7,6 +7,7 @@ on: - test/** - yt_dlp/**.py - '!yt_dlp/extractor/**.py' + - yt_dlp/extractor/youtube/**.py - yt_dlp/extractor/__init__.py - yt_dlp/extractor/common.py - yt_dlp/extractor/extractors.py @@ -17,6 +18,7 @@ on: - test/** - yt_dlp/**.py - '!yt_dlp/extractor/**.py' + - yt_dlp/extractor/youtube/**.py - yt_dlp/extractor/__init__.py - yt_dlp/extractor/common.py - yt_dlp/extractor/extractors.py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b8f1ed78ea..afe1d384b4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -269,9 +269,10 @@ jobs: "[![Master](https://img.shields.io/badge/Master%20builds-lightblue.svg?style=for-the-badge)]" \ "(https://github.com/${MASTER_REPO}/releases/latest \"Master builds\")" >> ./RELEASE_NOTES fi - printf '\n\n%s\n\n%s%s\n\n---\n' \ + printf '\n\n%s\n\n%s%s%s\n\n---\n' \ "#### A description of the various files is in the [README](https://github.com/${REPOSITORY}#release-files)" \ - "The PyInstaller-bundled executables are subject to the licenses described in " \ + "The zipimport Unix executable contains code licensed under ISC and MIT. " \ + "The PyInstaller-bundled executables are subject to these and other licenses, all of which are compiled in " \ "[THIRD_PARTY_LICENSES.txt](https://github.com/${BASE_REPO}/blob/${HEAD_SHA}/THIRD_PARTY_LICENSES.txt)" >> ./RELEASE_NOTES python ./devscripts/make_changelog.py -vv --collapsible >> ./RELEASE_NOTES printf '%s\n\n' '**This is a pre-release build**' >> ./PRERELEASE_NOTES diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml deleted file mode 100644 index 1b310db6aa..0000000000 --- a/.github/workflows/signature-tests.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: Signature Tests -on: - push: - paths: - - .github/workflows/signature-tests.yml - - test/test_youtube_signature.py - - yt_dlp/jsinterp.py - pull_request: - paths: - - .github/workflows/signature-tests.yml - - test/test_youtube_signature.py - - yt_dlp/jsinterp.py -permissions: - contents: read - -concurrency: - group: signature-tests-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - tests: - name: Signature Tests - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest] - python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', pypy-3.11] - steps: - - uses: actions/checkout@v5 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - name: Install test requirements - run: python ./devscripts/install_deps.py --only-optional-groups --include-group test - - name: Run tests - timeout-minutes: 15 - run: | - python3 -m yt_dlp -v || true # Print debug head - python3 ./devscripts/run_tests.py test/test_youtube_signature.py diff --git a/.gitignore b/.gitignore index 40bb34d2aa..af6da639db 100644 --- a/.gitignore +++ b/.gitignore @@ -107,6 +107,7 @@ README.txt test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp test/testdata/thumbnails/foo\ %d\ bar/foo_%d.* +.ejs-* # Binary /youtube-dl @@ -129,3 +130,6 @@ yt-dlp.zip # Plugins ytdlp_plugins/ yt-dlp-plugins + +# Packages +yt_dlp_ejs/ diff --git a/Makefile b/Makefile index 290955d209..88727219b8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ all: lazy-extractors yt-dlp doc pypi-files +all-extra: lazy-extractors yt-dlp-extra doc pypi-files clean: clean-test clean-dist clean-all: clean clean-cache completions: completion-bash completion-fish completion-zsh @@ -15,7 +16,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ .PHONY: all clean clean-all clean-test clean-dist clean-cache \ completions completion-bash completion-fish completion-zsh \ doc issuetemplates supportedsites ot offlinetest codetest test \ - tar pypi-files lazy-extractors install uninstall + tar pypi-files lazy-extractors install uninstall \ + all-extra yt-dlp-extra current-ejs-version + +.IGNORE: current-ejs-version +.SILENT: current-ejs-version clean-test: rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ @@ -25,7 +30,8 @@ clean-test: test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."* clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ - yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS + yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS \ + yt-dlp.zip .ejs-* yt_dlp_ejs/ clean-cache: find . \( \ -type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ @@ -81,28 +87,49 @@ test: offlinetest: codetest $(PYTHON) -m pytest -Werror -m "not download" -CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort -CODE_FOLDERS != $(CODE_FOLDERS_CMD) -CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD)) -CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done -CODE_FILES != $(CODE_FILES_CMD) -CODE_FILES ?= $(shell $(CODE_FILES_CMD)) -yt-dlp: $(CODE_FILES) +PY_CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's|/__init__\.py||' | grep -v '/__' | sort +PY_CODE_FOLDERS != $(PY_CODE_FOLDERS_CMD) +PY_CODE_FOLDERS ?= $(shell $(PY_CODE_FOLDERS_CMD)) + +PY_CODE_FILES_CMD = for f in $(PY_CODE_FOLDERS) ; do echo "$$f" | sed 's|$$|/*.py|' ; done +PY_CODE_FILES != $(PY_CODE_FILES_CMD) +PY_CODE_FILES ?= $(shell $(PY_CODE_FILES_CMD)) + +JS_CODE_FOLDERS_CMD = find yt_dlp -type f -name '*.js' | sed 's|/[^/]\{1,\}\.js$$||' | uniq +JS_CODE_FOLDERS != $(JS_CODE_FOLDERS_CMD) +JS_CODE_FOLDERS ?= $(shell $(JS_CODE_FOLDERS_CMD)) + +JS_CODE_FILES_CMD = for f in $(JS_CODE_FOLDERS) ; do echo "$$f" | sed 's|$$|/*.js|' ; done +JS_CODE_FILES != $(JS_CODE_FILES_CMD) +JS_CODE_FILES ?= $(shell $(JS_CODE_FILES_CMD)) + +yt-dlp.zip: $(PY_CODE_FILES) $(JS_CODE_FILES) mkdir -p zip - for d in $(CODE_FOLDERS) ; do \ + for d in $(PY_CODE_FOLDERS) ; do \ mkdir -p zip/$$d ;\ cp -pPR $$d/*.py zip/$$d/ ;\ done - (cd zip && touch -t 200001010101 $(CODE_FILES)) - mv zip/yt_dlp/__main__.py zip/ - (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py) + for d in $(JS_CODE_FOLDERS) ; do \ + mkdir -p zip/$$d ;\ + cp -pPR $$d/*.js zip/$$d/ ;\ + done + (cd zip && touch -t 200001010101 $(PY_CODE_FILES) $(JS_CODE_FILES)) + rm -f zip/yt_dlp/__main__.py + (cd zip && zip -q ../yt-dlp.zip $(PY_CODE_FILES) $(JS_CODE_FILES)) rm -rf zip + +yt-dlp: yt-dlp.zip + mkdir -p zip + cp -pP yt_dlp/__main__.py zip/ + touch -t 200001010101 zip/__main__.py + (cd zip && zip -q ../yt-dlp.zip __main__.py) echo '#!$(PYTHON)' > yt-dlp cat yt-dlp.zip >> yt-dlp rm yt-dlp.zip chmod a+x yt-dlp + rm -rf zip -README.md: $(CODE_FILES) devscripts/make_readme.py +README.md: $(PY_CODE_FILES) devscripts/make_readme.py COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py CONTRIBUTING.md: README.md devscripts/make_contributing.py @@ -127,15 +154,15 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1 rm -f yt-dlp.1.temp.md -completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in +completions/bash/yt-dlp: $(PY_CODE_FILES) devscripts/bash-completion.in mkdir -p completions/bash $(PYTHON) devscripts/bash-completion.py -completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in +completions/zsh/_yt-dlp: $(PY_CODE_FILES) devscripts/zsh-completion.in mkdir -p completions/zsh $(PYTHON) devscripts/zsh-completion.py -completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in +completions/fish/yt-dlp.fish: $(PY_CODE_FILES) devscripts/fish-completion.in mkdir -p completions/fish $(PYTHON) devscripts/fish-completion.py @@ -172,3 +199,45 @@ CONTRIBUTORS: Changelog.md echo 'Updating $@ from git commit history' ; \ $(PYTHON) devscripts/make_changelog.py -v -c > /dev/null ; \ fi + +# The following EJS_-prefixed variables are auto-generated by devscripts/update_ejs.py +# DO NOT EDIT! +EJS_VERSION = 0.3.0 +EJS_WHEEL_NAME = yt_dlp_ejs-0.3.0-py3-none-any.whl +EJS_WHEEL_HASH = sha256:abbf269fa1674cab7b7b266e51e89e0e60b01a11a0fdf3cd63528683190cdd07 +EJS_PY_FOLDERS = yt_dlp_ejs yt_dlp_ejs/yt yt_dlp_ejs/yt/solver +EJS_PY_FILES = yt_dlp_ejs/__init__.py yt_dlp_ejs/_version.py yt_dlp_ejs/yt/__init__.py yt_dlp_ejs/yt/solver/__init__.py +EJS_JS_FOLDERS = yt_dlp_ejs/yt/solver +EJS_JS_FILES = yt_dlp_ejs/yt/solver/core.min.js yt_dlp_ejs/yt/solver/lib.min.js + +yt-dlp-extra: current-ejs-version .ejs-$(EJS_VERSION) $(EJS_PY_FILES) $(EJS_JS_FILES) yt-dlp.zip + mkdir -p zip + for d in $(EJS_PY_FOLDERS) ; do \ + mkdir -p zip/$$d ;\ + cp -pPR $$d/*.py zip/$$d/ ;\ + done + for d in $(EJS_JS_FOLDERS) ; do \ + mkdir -p zip/$$d ;\ + cp -pPR $$d/*.js zip/$$d/ ;\ + done + (cd zip && touch -t 200001010101 $(EJS_PY_FILES) $(EJS_JS_FILES)) + (cd zip && zip -q ../yt-dlp.zip $(EJS_PY_FILES) $(EJS_JS_FILES)) + cp -pP yt_dlp/__main__.py zip/ + touch -t 200001010101 zip/__main__.py + (cd zip && zip -q ../yt-dlp.zip __main__.py) + echo '#!$(PYTHON)' > yt-dlp + cat yt-dlp.zip >> yt-dlp + rm yt-dlp.zip + chmod a+x yt-dlp + rm -rf zip + +.ejs-$(EJS_VERSION): + @echo Downloading yt-dlp-ejs + @echo "yt-dlp-ejs==$(EJS_VERSION) --hash $(EJS_WHEEL_HASH)" > .ejs-requirements.txt + $(PYTHON) -m pip download -d ./build --no-deps --require-hashes -r .ejs-requirements.txt + unzip -o build/$(EJS_WHEEL_NAME) "yt_dlp_ejs/*" + @touch .ejs-$(EJS_VERSION) + +current-ejs-version: + rm -rf .ejs-* + touch .ejs-$$($(PYTHON) -c 'import sys; sys.path = [""]; from yt_dlp_ejs import version; print(version)' 2>/dev/null) diff --git a/README.md b/README.md index 7b3cd0970d..4e7f442a60 100644 --- a/README.md +++ b/README.md @@ -145,9 +145,11 @@ While yt-dlp is licensed under the [Unlicense](LICENSE), many of the release fil Most notably, the PyInstaller-bundled executables include GPLv3+ licensed code, and as such the combined work is licensed under [GPLv3+](https://www.gnu.org/licenses/gpl-3.0.html). -See [THIRD_PARTY_LICENSES.txt](THIRD_PARTY_LICENSES.txt) for details. +The zipimport Unix executable (`yt-dlp`) contains [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) licensed code from [`meriyah`](https://github.com/meriyah/meriyah) and [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) licensed code from [`astring`](https://github.com/davidbonnet/astring). -The zipimport binary (`yt-dlp`), the source tarball (`yt-dlp.tar.gz`), and the PyPI source distribution & wheel only contain code licensed under the [Unlicense](LICENSE). +See [THIRD_PARTY_LICENSES.txt](THIRD_PARTY_LICENSES.txt) for more details. + +The git repository, the source tarball (`yt-dlp.tar.gz`), the PyPI source distribution and the PyPI built distribution (wheel) only contain code licensed under the [Unlicense](LICENSE). @@ -201,7 +203,7 @@ Python versions 3.10+ (CPython) and 3.11+ (PyPy) are supported. Other versions a On Windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually. --> -While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended +While all the other dependencies are optional, `ffmpeg`, `ffprobe`, `yt-dlp-ejs` and a JavaScript runtime are highly recommended ### Strongly recommended @@ -211,6 +213,10 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly **Important**: What you need is ffmpeg *binary*, **NOT** [the Python package of the same name](https://pypi.org/project/ffmpeg) +* [**yt-dlp-ejs**](https://github.com/yt-dlp/ejs) - Required for deciphering YouTube n/sig values. Licensed under [Unlicense](https://github.com/yt-dlp/ejs/blob/main/LICENSE), bundles [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) and [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) components. + + A JavaScript runtime like [**deno**](https://deno.land) (recommended), [**node.js**](https://nodejs.org), [**bun**](https://bun.sh), or [**QuickJS**](https://bellard.org/quickjs/) is also required to run yt-dlp-ejs. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/EJS). + ### Networking * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) * [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) @@ -235,7 +241,7 @@ The following provide support for impersonating browser requests. This may be re ### Misc * [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) -* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in some extractors where JavaScript needs to be run. No longer used for YouTube. To be deprecated in the near future. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) * [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * Any external downloader that you want to use with `--downloader` @@ -362,6 +368,26 @@ Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords --no-plugin-dirs Clear plugin directories to search, including defaults and those provided by previous --plugin-dirs + --js-runtimes RUNTIME[:PATH] Additional JavaScript runtime to enable, + with an optional path to the runtime + location. This option can be used multiple + times to enable multiple runtimes. Supported + runtimes: deno, node, bun, quickjs. By + default, only "deno" runtime is enabled. + --no-js-runtimes Clear JavaScript runtimes to enable, + including defaults and those provided by + previous --js-runtimes + --remote-components COMPONENT Remote components to allow yt-dlp to fetch + when required. You can use this option + multiple times to allow multiple components. + Supported values: ejs:npm (external + JavaScript components from npm), ejs:github + (external JavaScript components from yt-dlp- + ejs GitHub). By default, no remote + components are allowed. + --no-remote-components Disallow fetching of all remote components, + including any previously allowed by + --remote-components or defaults. --flat-playlist Do not extract a playlist's URL result entries; some entry metadata may be missing and downloading may be bypassed @@ -1814,7 +1840,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `android_sdkless,tv,web_safari,web` is used. `android_sdkless` is omitted if cookies are passed. If premium cookies are passed, `tv,web_creator,web_safari,web` is used instead. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` +* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,android_sdkless,web` is used. If no JavaScript runtime is available, then `android_sdkless,web_safari,web` is used. If logged-in cookies are passed to yt-dlp, then `tv,web_safari,web` is used for free accounts and `tv,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details * `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. @@ -1833,6 +1859,10 @@ The following extractors use this feature: * `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default) * `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context) * `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds +* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default) + +#### youtube-ejs +* `jitless`: Run suported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered unsecure. Either `true` or `false` (default) #### youtubepot-webpo * `bind_to_visitor_id`: Whether to use the Visitor ID instead of Visitor Data for caching WebPO tokens. Either `true` (default) or `false` diff --git a/THIRD_PARTY_LICENSES.txt b/THIRD_PARTY_LICENSES.txt index 1040046541..f7977064a0 100644 --- a/THIRD_PARTY_LICENSES.txt +++ b/THIRD_PARTY_LICENSES.txt @@ -4431,3 +4431,43 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +-------------------------------------------------------------------------------- +Meriyah | ISC +URL: https://github.com/meriyah/meriyah +-------------------------------------------------------------------------------- +ISC License + +Copyright (c) 2019 and later, KFlash and others. + +Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + + +-------------------------------------------------------------------------------- +Astring | MIT +URL: https://github.com/davidbonnet/astring/ +-------------------------------------------------------------------------------- +Copyright (c) 2015, David Bonnet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/devscripts/generate_third_party_licenses.py b/devscripts/generate_third_party_licenses.py index db615d2e35..322d56f633 100644 --- a/devscripts/generate_third_party_licenses.py +++ b/devscripts/generate_third_party_licenses.py @@ -271,6 +271,19 @@ DEPENDENCIES: list[Dependency] = [ license_url='https://raw.githubusercontent.com/python-websockets/websockets/refs/heads/main/LICENSE', project_url='https://websockets.readthedocs.io/', ), + # Dependencies of yt-dlp-ejs + Dependency( + name='Meriyah', + license='ISC', + license_url='https://raw.githubusercontent.com/meriyah/meriyah/refs/heads/main/LICENSE.md', + project_url='https://github.com/meriyah/meriyah', + ), + Dependency( + name='Astring', + license='MIT', + license_url='https://raw.githubusercontent.com/davidbonnet/astring/refs/heads/main/LICENSE', + project_url='https://github.com/davidbonnet/astring/', + ), ] diff --git a/devscripts/update_ejs.py b/devscripts/update_ejs.py new file mode 100644 index 0000000000..cffb1aa2b4 --- /dev/null +++ b/devscripts/update_ejs.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import contextlib +import io +import json +import hashlib +import pathlib +import urllib.request +import zipfile + + +TEMPLATE = '''\ +# This file is generated by devscripts/update_ejs.py. DO NOT MODIFY! + +VERSION = {version!r} +HASHES = {{ +{hash_mapping} +}} +''' +PREFIX = ' "yt-dlp-ejs==' +BASE_PATH = pathlib.Path(__file__).parent.parent +PYPROJECT_PATH = BASE_PATH / 'pyproject.toml' +PACKAGE_PATH = BASE_PATH / 'yt_dlp/extractor/youtube/jsc/_builtin/vendor' +RELEASE_URL = 'https://api.github.com/repos/yt-dlp/ejs/releases/latest' +ASSETS = { + 'yt.solver.lib.js': False, + 'yt.solver.lib.min.js': False, + 'yt.solver.deno.lib.js': True, + 'yt.solver.bun.lib.js': True, + 'yt.solver.core.min.js': False, + 'yt.solver.core.js': True, +} +MAKEFILE_PATH = BASE_PATH / 'Makefile' + + +def request(url: str): + return contextlib.closing(urllib.request.urlopen(url)) + + +def makefile_variables( + version: str | None = None, + name: str | None = None, + digest: str | None = None, + data: bytes | None = None, + keys_only: bool = False, +) -> dict[str, str | None]: + assert keys_only or all(arg is not None for arg in (version, name, digest, data)) + + return { + 'EJS_VERSION': None if keys_only else version, + 'EJS_WHEEL_NAME': None if keys_only else name, + 'EJS_WHEEL_HASH': None if keys_only else digest, + 'EJS_PY_FOLDERS': None if keys_only else list_wheel_contents(data, 'py', files=False), + 'EJS_PY_FILES': None if keys_only else list_wheel_contents(data, 'py', folders=False), + 'EJS_JS_FOLDERS': None if keys_only else list_wheel_contents(data, 'js', files=False), + 'EJS_JS_FILES': None if keys_only else list_wheel_contents(data, 'js', folders=False), + } + + +def list_wheel_contents( + wheel_data: bytes, + suffix: str | None = None, + folders: bool = True, + files: bool = True, +) -> str: + assert folders or files, 'at least one of "folders" or "files" must be True' + + path_gen = (zinfo.filename for zinfo in zipfile.ZipFile(io.BytesIO(wheel_data)).infolist()) + filtered = filter(lambda path: path.startswith('yt_dlp_ejs/'), path_gen) + if suffix: + filtered = filter(lambda path: path.endswith(f'.{suffix}'), filtered) + + files_list = list(filtered) + if not folders: + return ' '.join(files_list) + + folders_list = list(dict.fromkeys(path.rpartition('/')[0] for path in files_list)) + if not files: + return ' '.join(folders_list) + + return ' '.join(folders_list + files_list) + + +def main(): + current_version = None + with PYPROJECT_PATH.open() as file: + for line in file: + if not line.startswith(PREFIX): + continue + current_version, _, _ = line.removeprefix(PREFIX).partition('"') + + if not current_version: + print('yt-dlp-ejs dependency line could not be found') + return + + makefile_info = makefile_variables(keys_only=True) + prefixes = tuple(f'{key} = ' for key in makefile_info) + with MAKEFILE_PATH.open() as file: + for line in file: + if not line.startswith(prefixes): + continue + key, _, val = line.partition(' = ') + makefile_info[key] = val.rstrip() + + with request(RELEASE_URL) as resp: + info = json.load(resp) + + version = info['tag_name'] + if version == current_version: + print(f'yt-dlp-ejs is up to date! ({version})') + return + + print(f'Updating yt-dlp-ejs from {current_version} to {version}') + hashes = [] + wheel_info = {} + for asset in info['assets']: + name = asset['name'] + is_wheel = name.startswith('yt_dlp_ejs-') and name.endswith('.whl') + if not is_wheel and name not in ASSETS: + continue + with request(asset['browser_download_url']) as resp: + data = resp.read() + + # verify digest from github + digest = asset['digest'] + algo, _, expected = digest.partition(':') + hexdigest = hashlib.new(algo, data).hexdigest() + assert hexdigest == expected, f'downloaded attest mismatch ({hexdigest!r} != {expected!r})' + + if is_wheel: + wheel_info = makefile_variables(version, name, digest, data) + continue + + # calculate sha3-512 digest + asset_hash = hashlib.sha3_512(data).hexdigest() + hashes.append(f' {name!r}: {asset_hash!r},') + + if ASSETS[name]: + (PACKAGE_PATH / name).write_bytes(data) + + hash_mapping = '\n'.join(hashes) + for asset_name in ASSETS: + assert asset_name in hash_mapping, f'{asset_name} not found in release' + + assert all(wheel_info.get(key) for key in makefile_info), 'wheel info not found in release' + + (PACKAGE_PATH / '_info.py').write_text(TEMPLATE.format( + version=version, + hash_mapping=hash_mapping, + )) + + content = PYPROJECT_PATH.read_text() + updated = content.replace(PREFIX + current_version, PREFIX + version) + PYPROJECT_PATH.write_text(updated) + + makefile = MAKEFILE_PATH.read_text() + for key in wheel_info: + makefile = makefile.replace(f'{key} = {makefile_info[key]}', f'{key} = {wheel_info[key]}') + MAKEFILE_PATH.write_text(makefile) + + +if __name__ == '__main__': + main() diff --git a/pyproject.toml b/pyproject.toml index 1d6e573791..0f6202ca08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ default = [ "requests>=2.32.2,<3", "urllib3>=2.0.2,<3", "websockets>=13.0", + "yt-dlp-ejs==0.3.0", ] curl-cffi = [ "curl-cffi>=0.5.10,!=0.6.*,!=0.7.*,!=0.8.*,!=0.9.*,<0.14; implementation_name=='cpython'", @@ -122,7 +123,12 @@ artifacts = [ [tool.hatch.build.targets.wheel] packages = ["yt_dlp"] -artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] +artifacts = [ + "/yt_dlp/extractor/lazy_extractors.py", +] +exclude = [ + "/yt_dlp/**/*.md", +] [tool.hatch.build.targets.wheel.shared-data] "completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp" diff --git a/test/test_jsc/conftest.py b/test/test_jsc/conftest.py new file mode 100644 index 0000000000..28d6734122 --- /dev/null +++ b/test/test_jsc/conftest.py @@ -0,0 +1,60 @@ +import re +import pathlib + +import pytest + +import yt_dlp.globals +from yt_dlp import YoutubeDL +from yt_dlp.extractor.common import InfoExtractor + + +_TESTDATA_PATH = pathlib.Path(__file__).parent.parent / 'testdata/sigs' +_player_re = re.compile(r'^.+/player/(?P[a-zA-Z0-9_/.-]+)\.js$') +_player_id_trans = str.maketrans(dict.fromkeys('/.-', '_')) + + +@pytest.fixture +def ie() -> InfoExtractor: + runtime_names = yt_dlp.globals.supported_js_runtimes.value + ydl = YoutubeDL({'js_runtimes': {key: {} for key in runtime_names}}) + ie = ydl.get_info_extractor('Youtube') + + def _load_player(video_id, player_url, fatal=True): + match = _player_re.match(player_url) + test_id = match.group('id').translate(_player_id_trans) + cached_file = _TESTDATA_PATH / f'player-{test_id}.js' + + if cached_file.exists(): + return cached_file.read_text() + + if code := ie._download_webpage(player_url, video_id, fatal=fatal): + _TESTDATA_PATH.mkdir(exist_ok=True, parents=True) + cached_file.write_text(code) + return code + + return None + + ie._load_player = _load_player + return ie + + +class MockLogger: + def trace(self, message: str): + print(f'trace: {message}') + + def debug(self, message: str, *, once=False): + print(f'debug: {message}') + + def info(self, message: str): + print(f'info: {message}') + + def warning(self, message: str, *, once=False): + print(f'warning: {message}') + + def error(self, message: str): + print(f'error: {message}') + + +@pytest.fixture +def logger(): + return MockLogger() diff --git a/test/test_jsc/test_ejs_integration.py b/test/test_jsc/test_ejs_integration.py new file mode 100644 index 0000000000..7984810794 --- /dev/null +++ b/test/test_jsc/test_ejs_integration.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import dataclasses +import enum +import importlib.util +import json + +import pytest + +from yt_dlp.extractor.youtube.jsc.provider import ( + JsChallengeRequest, + JsChallengeType, + JsChallengeProviderResponse, + JsChallengeResponse, + NChallengeInput, + NChallengeOutput, + SigChallengeInput, + SigChallengeOutput, +) +from yt_dlp.extractor.youtube.jsc._builtin.bun import BunJCP +from yt_dlp.extractor.youtube.jsc._builtin.deno import DenoJCP +from yt_dlp.extractor.youtube.jsc._builtin.node import NodeJCP +from yt_dlp.extractor.youtube.jsc._builtin.quickjs import QuickJSJCP + + +_has_ejs = bool(importlib.util.find_spec('yt_dlp_ejs')) +pytestmark = pytest.mark.skipif(not _has_ejs, reason='yt-dlp-ejs not available') + + +class Variant(enum.Enum): + main = 'player_ias.vflset/en_US/base.js' + tcc = 'player_ias_tcc.vflset/en_US/base.js' + tce = 'player_ias_tce.vflset/en_US/base.js' + es5 = 'player_es5.vflset/en_US/base.js' + es6 = 'player_es6.vflset/en_US/base.js' + tv = 'tv-player-ias.vflset/tv-player-ias.js' + tv_es6 = 'tv-player-es6.vflset/tv-player-es6.js' + phone = 'player-plasma-ias-phone-en_US.vflset/base.js' + tablet = 'player-plasma-ias-tablet-en_US.vflset/base.js' + + +@dataclasses.dataclass +class Challenge: + player: str + variant: Variant + type: JsChallengeType + values: dict[str, str] = dataclasses.field(default_factory=dict) + + def url(self, /): + return f'https://www.youtube.com/s/player/{self.player}/{self.variant.value}' + + +CHALLENGES: list[Challenge] = [ + Challenge('3d3ba064', Variant.tce, JsChallengeType.N, { + 'ZdZIqFPQK-Ty8wId': 'qmtUsIz04xxiNW', + '4GMrWHyKI5cEvhDO': 'N9gmEX7YhKTSmw', + }), + Challenge('3d3ba064', Variant.tce, JsChallengeType.SIG, { + 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt': + 'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3gqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kNyBf6HPuAuCduh-a7O', + }), + Challenge('5ec65609', Variant.tce, JsChallengeType.N, { + '0eRGgQWJGfT5rFHFj': '4SvMpDQH-vBJCw', + }), + Challenge('5ec65609', Variant.tce, JsChallengeType.SIG, { + 'AAJAJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grH0rTMICA1mmDc0HoXgW3CAiAQQ4=CspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ=I': + 'AJfQdSswRQIhAMG5SN7-cAFChdrE7tLA6grI0rTMICA1mmDc0HoXgW3CAiAQQ4HCspfaF_vt82XH5yewvqcuEkvzeTsbRuHssRMyJQ==', + }), + Challenge('6742b2b9', Variant.tce, JsChallengeType.N, { + '_HPB-7GFg1VTkn9u': 'qUAsPryAO_ByYg', + 'K1t_fcB6phzuq2SF': 'Y7PcOt3VE62mog', + }), + Challenge('6742b2b9', Variant.tce, JsChallengeType.SIG, { + 'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJAA': + 'AJfQdSswRAIgMVVvrovTbw6UNh99kPa4D_XQjGT4qYu7S6SHM8EjoCACIEQnz-nKN5RgG6iUTnNJC58csYPSrnS_SzricuUMJZGM', + }), + Challenge('2b83d2e0', Variant.main, JsChallengeType.N, { + '0eRGgQWJGfT5rFHFj': 'euHbygrCMLksxd', + }), + Challenge('2b83d2e0', Variant.main, JsChallengeType.SIG, { + 'MMGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKn-znQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJA': + '-MGZJMUucirzS_SnrSPYsc85CJNnTUi6GgR5NKnMznQEICACojE8MHS6S7uYq4TGjQX_D4aPk99hNU6wbTvorvVVMgIARwsSdQfJ', + }), + Challenge('638ec5c6', Variant.main, JsChallengeType.N, { + 'ZdZIqFPQK-Ty8wId': '1qov8-KM-yH', + }), + Challenge('638ec5c6', Variant.main, JsChallengeType.SIG, { + 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt': + 'MhudCuAuP-6fByOk1_GNXN7gNHHShjyXS2VOgsEItAJz0tipeav0OmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', + }), +] + +requests: list[JsChallengeRequest] = [] +responses: list[JsChallengeProviderResponse] = [] +for test in CHALLENGES: + input_type, output_type = { + JsChallengeType.N: (NChallengeInput, NChallengeOutput), + JsChallengeType.SIG: (SigChallengeInput, SigChallengeOutput), + }[test.type] + + request = JsChallengeRequest(test.type, input_type(test.url(), list(test.values.keys())), test.player) + requests.append(request) + responses.append(JsChallengeProviderResponse(request, JsChallengeResponse(test.type, output_type(test.values)))) + + +@pytest.fixture(params=[BunJCP, DenoJCP, NodeJCP, QuickJSJCP]) +def jcp(request, ie, logger): + obj = request.param(ie, logger, None) + if not obj.is_available(): + pytest.skip(f'{obj.PROVIDER_NAME} is not available') + obj.is_dev = True + return obj + + +@pytest.mark.download +def test_bulk_requests(jcp): + assert list(jcp.bulk_solve(requests)) == responses + + +@pytest.mark.download +def test_using_cached_player(jcp): + first_player_requests = requests[:3] + player = jcp._get_player(first_player_requests[0].video_id, first_player_requests[0].input.player_url) + initial = json.loads(jcp._run_js_runtime(jcp._construct_stdin(player, False, first_player_requests))) + preprocessed = initial.pop('preprocessed_player') + result = json.loads(jcp._run_js_runtime(jcp._construct_stdin(preprocessed, True, first_player_requests))) + + assert initial == result diff --git a/test/test_jsc/test_provider.py b/test/test_jsc/test_provider.py new file mode 100644 index 0000000000..3342f77546 --- /dev/null +++ b/test/test_jsc/test_provider.py @@ -0,0 +1,194 @@ + +import pytest + +from yt_dlp.extractor.youtube.jsc.provider import ( + JsChallengeProvider, + JsChallengeRequest, + JsChallengeProviderResponse, + JsChallengeProviderRejectedRequest, + JsChallengeType, + JsChallengeResponse, + NChallengeOutput, + NChallengeInput, + JsChallengeProviderError, + register_provider, + register_preference, +) +from yt_dlp.extractor.youtube.pot._provider import IEContentProvider +from yt_dlp.utils import ExtractorError +from yt_dlp.extractor.youtube.jsc._registry import _jsc_preferences, _jsc_providers + + +class ExampleJCP(JsChallengeProvider): + PROVIDER_NAME = 'example-provider' + PROVIDER_VERSION = '0.0.1' + BUG_REPORT_LOCATION = 'https://example.com/issues' + + _SUPPORTED_TYPES = [JsChallengeType.N] + + def is_available(self) -> bool: + return True + + def _real_bulk_solve(self, requests): + for request in requests: + results = dict.fromkeys(request.input.challenges, 'example-solution') + response = JsChallengeResponse( + type=request.type, + output=NChallengeOutput(results=results)) + yield JsChallengeProviderResponse(request=request, response=response) + + +PLAYER_URL = 'https://example.com/player.js' + + +class TestJsChallengeProvider: + # note: some test covered in TestPoTokenProvider which shares the same base class + def test_base_type(self): + assert issubclass(JsChallengeProvider, IEContentProvider) + + def test_create_provider_missing_bulk_solve_method(self, ie, logger): + class MissingMethodsJCP(JsChallengeProvider): + def is_available(self) -> bool: + return True + + with pytest.raises(TypeError, match='bulk_solve'): + MissingMethodsJCP(ie=ie, logger=logger, settings={}) + + def test_create_provider_missing_available_method(self, ie, logger): + class MissingMethodsJCP(JsChallengeProvider): + def _real_bulk_solve(self, requests): + raise JsChallengeProviderRejectedRequest('Not implemented') + + with pytest.raises(TypeError, match='is_available'): + MissingMethodsJCP(ie=ie, logger=logger, settings={}) + + def test_barebones_provider(self, ie, logger): + class BarebonesProviderJCP(JsChallengeProvider): + def is_available(self) -> bool: + return True + + def _real_bulk_solve(self, requests): + raise JsChallengeProviderRejectedRequest('Not implemented') + + provider = BarebonesProviderJCP(ie=ie, logger=logger, settings={}) + assert provider.PROVIDER_NAME == 'BarebonesProvider' + assert provider.PROVIDER_KEY == 'BarebonesProvider' + assert provider.PROVIDER_VERSION == '0.0.0' + assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .' + + def test_example_provider_success(self, ie, logger): + provider = ExampleJCP(ie=ie, logger=logger, settings={}) + + request = JsChallengeRequest( + type=JsChallengeType.N, + input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge'])) + + request_two = JsChallengeRequest( + type=JsChallengeType.N, + input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge-2'])) + + responses = list(provider.bulk_solve([request, request_two])) + assert len(responses) == 2 + assert all(isinstance(r, JsChallengeProviderResponse) for r in responses) + assert responses == [ + JsChallengeProviderResponse( + request=request, + response=JsChallengeResponse( + type=JsChallengeType.N, + output=NChallengeOutput(results={'example-challenge': 'example-solution'}), + ), + ), + JsChallengeProviderResponse( + request=request_two, + response=JsChallengeResponse( + type=JsChallengeType.N, + output=NChallengeOutput(results={'example-challenge-2': 'example-solution'}), + ), + ), + ] + + def test_provider_unsupported_challenge_type(self, ie, logger): + provider = ExampleJCP(ie=ie, logger=logger, settings={}) + request_supported = JsChallengeRequest( + type=JsChallengeType.N, + input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge'])) + request_unsupported = JsChallengeRequest( + type=JsChallengeType.SIG, + input=NChallengeInput(player_url=PLAYER_URL, challenges=['example-challenge'])) + responses = list(provider.bulk_solve([request_supported, request_unsupported, request_supported])) + assert len(responses) == 3 + # Requests are validated first before continuing to _real_bulk_solve + assert isinstance(responses[0], JsChallengeProviderResponse) + assert isinstance(responses[0].error, JsChallengeProviderRejectedRequest) + assert responses[0].request is request_unsupported + assert str(responses[0].error) == 'JS Challenge type "JsChallengeType.SIG" is not supported by example-provider' + + assert responses[1:] == [ + JsChallengeProviderResponse( + request=request_supported, + response=JsChallengeResponse( + type=JsChallengeType.N, + output=NChallengeOutput(results={'example-challenge': 'example-solution'}), + ), + ), + JsChallengeProviderResponse( + request=request_supported, + response=JsChallengeResponse( + type=JsChallengeType.N, + output=NChallengeOutput(results={'example-challenge': 'example-solution'}), + ), + ), + ] + + def test_provider_get_player(self, ie, logger): + ie._load_player = lambda video_id, player_url, fatal: (video_id, player_url, fatal) + provider = ExampleJCP(ie=ie, logger=logger, settings={}) + assert provider._get_player('video123', PLAYER_URL) == ('video123', PLAYER_URL, True) + + def test_provider_get_player_error(self, ie, logger): + def raise_error(video_id, player_url, fatal): + raise ExtractorError('Failed to load player') + + ie._load_player = raise_error + provider = ExampleJCP(ie=ie, logger=logger, settings={}) + with pytest.raises(JsChallengeProviderError, match='Failed to load player for JS challenge'): + provider._get_player('video123', PLAYER_URL) + + def test_require_class_end_with_suffix(self, ie, logger): + class InvalidSuffix(JsChallengeProvider): + PROVIDER_NAME = 'invalid-suffix' + + def _real_bulk_solve(self, requests): + raise JsChallengeProviderRejectedRequest('Not implemented') + + def is_available(self) -> bool: + return True + + provider = InvalidSuffix(ie=ie, logger=logger, settings={}) + + with pytest.raises(AssertionError): + provider.PROVIDER_KEY # noqa: B018 + + +def test_register_provider(ie): + + @register_provider + class UnavailableProviderJCP(JsChallengeProvider): + def is_available(self) -> bool: + return False + + def _real_bulk_solve(self, requests): + raise JsChallengeProviderRejectedRequest('Not implemented') + + assert _jsc_providers.value.get('UnavailableProvider') == UnavailableProviderJCP + _jsc_providers.value.pop('UnavailableProvider') + + +def test_register_preference(ie): + before = len(_jsc_preferences.value) + + @register_preference(ExampleJCP) + def unavailable_preference(*args, **kwargs): + return 1 + + assert len(_jsc_preferences.value) == before + 1 diff --git a/test/test_pot/test_pot_framework.py b/test/test_pot/test_pot_framework.py index d2de1dd290..fae6c80027 100644 --- a/test/test_pot/test_pot_framework.py +++ b/test/test_pot/test_pot_framework.py @@ -1,6 +1,6 @@ import pytest -from yt_dlp.extractor.youtube.pot._provider import IEContentProvider +from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, configuration_arg from yt_dlp.cookies import YoutubeDLCookieJar from yt_dlp.utils.networking import HTTPHeaderDict from yt_dlp.extractor.youtube.pot.provider import ( @@ -627,3 +627,13 @@ def test_logger_log_level(logger): assert logger.LogLevel('debuG') == logger.LogLevel.DEBUG assert logger.LogLevel(10) == logger.LogLevel.DEBUG assert logger.LogLevel('UNKNOWN') == logger.LogLevel.INFO + + +def test_configuration_arg(): + config = {'abc': ['123D'], 'xyz': ['456a', '789B']} + + assert configuration_arg(config, 'abc') == ['123d'] + assert configuration_arg(config, 'abc', default=['default']) == ['123d'] + assert configuration_arg(config, 'ABC', default=['default']) == ['default'] + assert configuration_arg(config, 'abc', casesense=True) == ['123D'] + assert configuration_arg(config, 'xyz', casesense=False) == ['456a', '789b'] diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py deleted file mode 100644 index 2e9c974db2..0000000000 --- a/test/test_youtube_signature.py +++ /dev/null @@ -1,504 +0,0 @@ -#!/usr/bin/env python3 - -# Allow direct execution -import os -import sys -import unittest - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -import contextlib -import re -import string -import urllib.request - -from test.helper import FakeYDL, is_download_test -from yt_dlp.extractor import YoutubeIE -from yt_dlp.jsinterp import JSInterpreter - -_SIG_TESTS = [ - ( - 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', - 86, - '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', - ), - ( - 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', - 85, - '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', - ), - ( - 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', - 90, - ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', - ), - ( - 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', - 84, - 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', - ), - ( - 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', - '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', - 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', - ), - ( - 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', - 84, - '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>', - ), - ( - 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', - 83, - '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F', - ), - ( - 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', - '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', - '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B', - ), - ( - 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', - '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', - '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', - ), - ( - 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', - ), - ( - 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - ), - ( - 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - '0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q', - ), - ( - 'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1', - ), - ( - 'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - ), - ( - 'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - ), - ( - 'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', - ), - ( - 'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', - ), - ( - 'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw', - ), - ( - 'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw', - ), - ( - 'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw', - ), - ( - 'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw', - ), - ( - 'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0', - ), - ( - 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0', - ), - ( - 'https://www.youtube.com/s/player/e12fbea4/player_ias.vflset/en_US/base.js', - 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', - 'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a', - ), - ( - 'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js', - 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', - 'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-', - ), - ( - 'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js', - 'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt', - 'ttJC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit2zJAsIEggOVaSXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-', - ), - ( - 'https://www.youtube.com/s/player/5ec65609/player_ias_tcc.vflset/en_US/base.js', - 'AAJAJfQdSswRAIgNSN0GDUcHnCIXkKcF61yLBgDHiX1sUhOJdY4_GxunRYCIDeYNYP_16mQTPm5f1OVq3oV1ijUNYPjP4iUSMAjO9bZ', - 'AJfQdSswRAIgNSN0GDUcHnCIXkKcF61ZLBgDHiX1sUhOJdY4_GxunRYCIDyYNYP_16mQTPm5f1OVq3oV1ijUNYPjP4iUSMAjO9be', - ), -] - -_NSIG_TESTS = [ - ( - 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js', - 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg', - ), - ( - 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', - 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', - ), - ( - 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', - 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', - ), - ( - 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', - 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', - ), - ( - 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', - 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', - ), - ( - 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js', - 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw', - ), - ( - 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js', - 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw', - ), - ( - 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js', - 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA', - ), - ( - 'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js', - 'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA', - ), - ( - 'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', - 'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw', - ), - ( - 'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js', - 'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg', - ), - ( - 'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js', - 'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw', - ), - ( - 'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js', - '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw', - ), - ( - 'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js', - '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ', - ), - ( - 'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js', - 'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg', - ), - ( - 'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js', - 'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA', - ), - ( - 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js', - 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ', - ), - ( - 'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js', - 'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w', - ), - ( - 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js', - 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A', - ), - ( - 'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js', - 'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw', - ), - ( - 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js', - 'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w', - ), - ( - 'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js', - '1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A', - ), - ( - 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', - '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', - ), - ( - 'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js', - '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg', - ), - ( - 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js', - 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A', - ), - ( - 'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js', - 'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl', - ), - ( - 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js', - '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw', - ), - ( - 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js', - 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw', - ), - ( - 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', - 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ', - ), - ( - 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', - 'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP', - ), - ( - 'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js', - 'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg', - ), - ( - 'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', - 'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ', - ), - ( - 'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js', - 'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg', - ), - ( - 'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js', - 'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v', - ), - ( - 'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js', - 'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww', - ), - ( - 'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js', - '-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg', - ), - ( - 'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', - 'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA', - ), - ( - 'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js', - 'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg', - ), - ( - 'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js', - 'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A', - ), - ( - 'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js', - 'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A', - ), - ( - 'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js', - 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4', - ), - ( - 'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js', - 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4', - ), - ( - 'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js', - 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4', - ), - ( - 'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js', - 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE', - ), - ( - 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js', - 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE', - ), - ( - 'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js', - 'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h', - ), - ( - 'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js', - 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u', - ), - ( - 'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js', - 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u', - ), - ( - 'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq', - ), - ( - 'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A', - ), - ( - 'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A', - ), - ( - 'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy', - ), - ( - 'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf', - ), - ( - 'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa', - ), - ( - 'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF', - ), - ( - 'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD', - ), - ( - 'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba', - ), - ( - 'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js', - 'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg', - ), - ( - 'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js', - 'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA', - ), - ( - 'https://www.youtube.com/s/player/010fbc8d/player_es5.vflset/en_US/base.js', - '0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg', - ), - ( - 'https://www.youtube.com/s/player/010fbc8d/player_es6.vflset/en_US/base.js', - '0hlOAlqjFszVvF4Z', 'R-H23bZGAsRFTg', - ), - ( - 'https://www.youtube.com/s/player/5ec65609/player_ias_tcc.vflset/en_US/base.js', - '6l5CTNx4AzIqH4MXM', 'NupToduxHBew1g', - ), -] - - -@is_download_test -class TestPlayerInfo(unittest.TestCase): - def test_youtube_extract_player_info(self): - PLAYER_URLS = ( - ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'), - ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'), - ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'), - ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'), - ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'), - ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'), - ('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'), - ('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'), - # obsolete - ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'), - ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'), - ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'), - ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'), - ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'), - ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'), - ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'), - ) - for player_url, expected_player_id in PLAYER_URLS: - player_id = YoutubeIE._extract_player_info(player_url) - self.assertEqual(player_id, expected_player_id) - - -@is_download_test -class TestSignature(unittest.TestCase): - def setUp(self): - TEST_DIR = os.path.dirname(os.path.abspath(__file__)) - self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs') - if not os.path.exists(self.TESTDATA_DIR): - os.mkdir(self.TESTDATA_DIR) - - def tearDown(self): - with contextlib.suppress(OSError): - for f in os.listdir(self.TESTDATA_DIR): - os.remove(f) - - -def t_factory(name, sig_func, url_pattern): - def make_tfunc(url, sig_input, expected_sig): - m = url_pattern.match(url) - assert m, f'{url!r} should follow URL format' - test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id')) - - def test_func(self): - basename = f'player-{test_id}.js' - fn = os.path.join(self.TESTDATA_DIR, basename) - - if not os.path.exists(fn): - urllib.request.urlretrieve(url, fn) - with open(fn, encoding='utf-8') as testf: - jscode = testf.read() - self.assertEqual(sig_func(jscode, sig_input, url), expected_sig) - - test_func.__name__ = f'test_{name}_js_{test_id}' - setattr(TestSignature, test_func.__name__, test_func) - return make_tfunc - - -def signature(jscode, sig_input, player_url): - func = YoutubeIE(FakeYDL())._parse_sig_js(jscode, player_url) - src_sig = ( - str(string.printable[:sig_input]) - if isinstance(sig_input, int) else sig_input) - return func(src_sig) - - -def n_sig(jscode, sig_input, player_url): - ie = YoutubeIE(FakeYDL()) - funcname = ie._extract_n_function_name(jscode, player_url=player_url) - jsi = JSInterpreter(jscode) - func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode, player_url)) - return func([sig_input]) - - -make_sig_test = t_factory( - 'signature', signature, - re.compile(r'''(?x) - .+(?: - /player/(?P[a-zA-Z0-9_/.-]+)| - /html5player-(?:en_US-)?(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)? - )\.js$''')) -for test_spec in _SIG_TESTS: - make_sig_test(*test_spec) - -make_nsig_test = t_factory( - 'nsig', n_sig, re.compile(r'.+/player/(?P[a-zA-Z0-9_/.-]+)\.js$')) -for test_spec in _NSIG_TESTS: - make_nsig_test(*test_spec) - - -if __name__ == '__main__': - unittest.main() diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index ef42ba68e3..539b10fe29 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -42,6 +42,8 @@ from .globals import ( plugin_pps, all_plugins_loaded, plugin_dirs, + supported_js_runtimes, + supported_remote_components, ) from .minicurses import format_text from .networking import HEADRequest, Request, RequestDirector @@ -533,6 +535,18 @@ class YoutubeDL: See "EXTRACTOR ARGUMENTS" for details. Argument values must always be a list of string(s). E.g. {'youtube': {'skip': ['dash', 'hls']}} + js_runtimes: A dictionary of JavaScript runtime keys (in lower case) to enable + and a dictionary of additional configuration for the runtime. + Currently supported runtimes are 'deno', 'node', 'bun', and 'quickjs'. + If None, the default runtime of "deno" will be enabled. + The runtime configuration dictionary can have the following keys: + - path: Path to the executable (optional) + E.g. {'deno': {'path': '/path/to/deno'} + remote_components: A list of remote components that are allowed to be fetched when required. + Supported components: + - ejs:npm (external JavaScript components from npm) + - ejs:github (external JavaScript components from yt-dlp-ejs GitHub) + By default, no remote components are allowed to be fetched. mark_watched: Mark videos watched (even with --simulate). Only for YouTube The following options are deprecated and may be removed in the future: @@ -717,6 +731,13 @@ class YoutubeDL: else: raise + # Note: this must be after plugins are loaded + self.params['js_runtimes'] = self.params.get('js_runtimes', {'deno': {}}) + self._clean_js_runtimes(self.params['js_runtimes']) + + self.params['remote_components'] = set(self.params.get('remote_components', ())) + self._clean_remote_components(self.params['remote_components']) + self.params['compat_opts'] = set(self.params.get('compat_opts', ())) self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers')) self._load_cookies(self.params['http_headers'].get('Cookie')) # compat @@ -829,6 +850,36 @@ class YoutubeDL: self.archive = preload_download_archive(self.params.get('download_archive')) + def _clean_js_runtimes(self, runtimes): + if not ( + isinstance(runtimes, dict) + and all(isinstance(k, str) and (v is None or isinstance(v, dict)) for k, v in runtimes.items()) + ): + raise ValueError('Invalid js_runtimes format, expected a dict of {runtime: {config}}') + + if unsupported_runtimes := runtimes.keys() - supported_js_runtimes.value.keys(): + self.report_warning( + f'Ignoring unsupported JavaScript runtime(s): {", ".join(unsupported_runtimes)}.' + f' Supported runtimes: {", ".join(supported_js_runtimes.value.keys())}.') + for rt in unsupported_runtimes: + runtimes.pop(rt) + + def _clean_remote_components(self, remote_components: set): + if unsupported_remote_components := set(remote_components) - set(supported_remote_components.value): + self.report_warning( + f'Ignoring unsupported remote component(s): {", ".join(unsupported_remote_components)}.' + f' Supported remote components: {", ".join(supported_remote_components.value)}.') + for rt in unsupported_remote_components: + remote_components.remove(rt) + + @functools.cached_property + def _js_runtimes(self): + runtimes = {} + for name, config in self.params.get('js_runtimes', {}).items(): + runtime_cls = supported_js_runtimes.value.get(name) + runtimes[name] = runtime_cls(path=config.get('path')) if runtime_cls else None + return runtimes + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ @@ -4064,6 +4115,18 @@ class YoutubeDL: join_nonempty(*get_package_info(m)) for m in available_dependencies.values() })) or 'none')) + if not self.params.get('js_runtimes'): + write_debug('JS runtimes: none (disabled)') + else: + write_debug('JS runtimes: %s' % (', '.join(sorted( + f'{name} (unknown)' if runtime is None + else join_nonempty( + runtime.info.name, + runtime.info.version + (' (unsupported)' if runtime.info.supported is False else ''), + ) + for name, runtime in self._js_runtimes.items() if runtime is None or runtime.info is not None + )) or 'none')) + write_debug(f'Proxy map: {self.proxies}') write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 8aee126030..2f6ba47832 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -61,8 +61,15 @@ from .utils import ( shell_quote, variadic, write_string, + ) from .utils._utils import _UnsafeExtensionError +from .utils._jsruntime import ( + BunJsRuntime as _BunJsRuntime, + DenoJsRuntime as _DenoJsRuntime, + NodeJsRuntime as _NodeJsRuntime, + QuickJsRuntime as _QuickJsRuntime, +) from .YoutubeDL import YoutubeDL @@ -773,6 +780,10 @@ def parse_options(argv=None): else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS) else None) + js_runtimes = { + runtime.lower(): {'path': path} for runtime, path in ( + [*arg.split(':', 1), None][:2] for arg in opts.js_runtimes)} + return ParsedOptions(parser, opts, urls, { 'usenetrc': opts.usenetrc, 'netrc_location': opts.netrc_location, @@ -940,6 +951,8 @@ def parse_options(argv=None): 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, 'useid': opts.useid or None, + 'js_runtimes': js_runtimes, + 'remote_components': opts.remote_components, 'warn_when_outdated': opts.update_self is None, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, @@ -1081,6 +1094,16 @@ def main(argv=None): from .extractor import gen_extractors, list_extractors +# Register JS runtimes and remote components +from .globals import supported_js_runtimes, supported_remote_components +supported_js_runtimes.value['deno'] = _DenoJsRuntime +supported_js_runtimes.value['node'] = _NodeJsRuntime +supported_js_runtimes.value['bun'] = _BunJsRuntime +supported_js_runtimes.value['quickjs'] = _QuickJsRuntime + +supported_remote_components.value.append('ejs:github') +supported_remote_components.value.append('ejs:npm') + __all__ = [ 'YoutubeDL', 'gen_extractors', diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py index 8e7f42f596..0c4bf7d63b 100644 --- a/yt_dlp/__pyinstaller/hook-yt_dlp.py +++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py @@ -34,3 +34,4 @@ print(f'Adding imports: {hiddenimports}') excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle'] datas = collect_data_files('curl_cffi', includes=['cacert.pem']) +datas += collect_data_files('yt_dlp_ejs', includes=['**/*.js']) diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py index 0d58da2bd5..cf2bcfb37e 100644 --- a/yt_dlp/dependencies/__init__.py +++ b/yt_dlp/dependencies/__init__.py @@ -81,6 +81,12 @@ except ImportError: from . import Cryptodome +try: + import yt_dlp_ejs +except ImportError: + yt_dlp_ejs = None + + all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} available_dependencies = {k: v for k, v in all_dependencies.items() if v} diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 133b069f22..062301b5ff 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -387,7 +387,8 @@ def _fix_embedded_ytcfg(ytcfg): def build_innertube_clients(): - BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android') + # From highest to lowest priority + BASE_CLIENTS = ('tv', 'web', 'mweb', 'android', 'ios') priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): @@ -409,9 +410,6 @@ def build_innertube_clients(): if variant == 'embedded': _fix_embedded_ytcfg(ytcfg) - ytcfg['priority'] -= 2 - elif variant: - ytcfg['priority'] -= 3 build_innertube_clients() diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 9d03254b85..226859a75f 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -4,9 +4,7 @@ import collections import datetime as dt import functools import itertools -import json import math -import os.path import random import re import sys @@ -26,10 +24,11 @@ from ._base import ( _split_innertube_client, short_client_name, ) +from .jsc._builtin.ejs import _EJS_WIKI_URL +from .jsc._director import initialize_jsc_director +from .jsc.provider import JsChallengeRequest, JsChallengeType, NChallengeInput, SigChallengeInput from .pot._director import initialize_pot_director from .pot.provider import PoTokenContext, PoTokenRequest -from ..openload import PhantomJSwrapper -from ...jsinterp import JSInterpreter, LocalNameSpace from ...networking.exceptions import HTTPError from ...utils import ( NO_DEFAULT, @@ -39,13 +38,11 @@ from ...utils import ( clean_html, datetime_from_str, filesize_from_tbr, - filter_dict, float_or_none, format_field, get_first, int_or_none, join_nonempty, - js_to_json, mimetype2ext, orderedSet, parse_codecs, @@ -148,10 +145,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'\b(?Pvfl[a-zA-Z0-9_-]+)\b.*?\.js$', ) _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt') - _DEFAULT_CLIENTS = ('android_sdkless', 'tv', 'web_safari', 'web') + _DEFAULT_CLIENTS = ('tv', 'android_sdkless', 'web') + _DEFAULT_JSLESS_CLIENTS = ('android_sdkless', 'web_safari', 'web') _DEFAULT_AUTHED_CLIENTS = ('tv', 'web_safari', 'web') # Premium does not require POT (except for subtitles) - _DEFAULT_PREMIUM_CLIENTS = ('tv', 'web_creator', 'web_safari', 'web') + _DEFAULT_PREMIUM_CLIENTS = ('tv', 'web_creator', 'web') _GEO_BYPASS = False @@ -1720,8 +1718,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js', } _INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()} - _NSIG_FUNC_CACHE_ID = 'nsig func' - _DUMMY_STRING = 'dlp_wins' @classmethod def suitable(cls, url): @@ -1741,6 +1737,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _real_initialize(self): super()._real_initialize() self._pot_director = initialize_pot_director(self) + self._jsc_director = initialize_jsc_director(self) def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live): lock = threading.Lock() @@ -1758,7 +1755,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): microformats = traverse_obj( prs, (..., 'microformat', 'playerMicroformatRenderer'), expected_type=dict) - _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url) + _, live_status, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url) is_live = live_status == 'is_live' start_time = time.time() @@ -2006,10 +2003,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): variant = re.sub(r'[^a-zA-Z0-9]', '_', remove_end(player_path, '.js')) return f'{player_id}-{variant}' - def _signature_cache_id(self, example_sig): - """ Return a string representation of a signature """ - return '.'.join(str(len(part)) for part in example_sig.split('.')) - @classmethod def _extract_player_info(cls, player_url): for player_re in cls._PLAYER_INFO_RE: @@ -2031,53 +2024,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._code_cache[player_js_key] = code return self._code_cache.get(player_js_key) - def _extract_signature_function(self, video_id, player_url, example_sig): - # Read from filesystem cache - func_id = join_nonempty( - self._player_js_cache_key(player_url), self._signature_cache_id(example_sig)) - assert os.path.basename(func_id) == func_id + def _sig_spec_cache_id(self, player_url, spec_id): + return join_nonempty(self._player_js_cache_key(player_url), str(spec_id)) - self.write_debug(f'Extracting signature function {func_id}') - cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.07.21'), None + def _load_sig_spec_from_cache(self, spec_cache_id): + # This is almost identical to _load_player_data_from_cache + # I hate it + if spec_cache_id in self._player_cache: + return self._player_cache[spec_cache_id] + spec = self.cache.load('youtube-sigfuncs', spec_cache_id, min_ver='2025.07.21') + if spec: + self._player_cache[spec_cache_id] = spec + return spec - if not cache_spec: - code = self._load_player(video_id, player_url) - if code: - res = self._parse_sig_js(code, player_url) - test_string = ''.join(map(chr, range(len(example_sig)))) - cache_spec = [ord(c) for c in res(test_string)] - self.cache.store('youtube-sigfuncs', func_id, cache_spec) + def _store_sig_spec_to_cache(self, spec_cache_id, spec): + if spec_cache_id not in self._player_cache: + self._player_cache[spec_cache_id] = spec + self.cache.store('youtube-sigfuncs', spec_cache_id, spec) - return lambda s: ''.join(s[i] for i in cache_spec) + def _load_player_data_from_cache(self, name, player_url): + cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url)) - def _parse_sig_js(self, jscode, player_url): - # Examples where `sig` is funcname: - # sig=function(a){a=a.split(""); ... ;return a.join("")}; - # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a}; - # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))} - # sig=function(J){J=J.split(""); ... ;return J.join("")}; - # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J}; - # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))} - funcname = self._search_regex( - (r'\b(?P[a-zA-Z0-9_$]+)&&\((?P=var)=(?P[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)', - r'(?P[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)', - r'(?:\b|[^a-zA-Z0-9_$])(?P[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?', - # Old patterns - r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\bm=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', - # Obsolete patterns - r'("|\')signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', - r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), - jscode, 'Initial JS player signature function name', group='sig') + if data := self._player_cache.get(cache_id): + return data - varname, global_list = self._interpret_player_js_global_var(jscode, player_url) - jsi = JSInterpreter(jscode) - initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list})) - return lambda s: initial_function([s]) + data = self.cache.load(*cache_id, min_ver='2025.07.21') + if data: + self._player_cache[cache_id] = data + + return data def _cached(self, func, *cache_id): def inner(*args, **kwargs): @@ -2095,246 +2070,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return ret return inner - def _load_player_data_from_cache(self, name, player_url): - cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url)) - - if data := self._player_cache.get(cache_id): - return data - - data = self.cache.load(*cache_id, min_ver='2025.07.21') - if data: - self._player_cache[cache_id] = data - - return data - def _store_player_data_to_cache(self, name, player_url, data): cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url)) if cache_id not in self._player_cache: self.cache.store(*cache_id, data) self._player_cache[cache_id] = data - def _decrypt_signature(self, s, video_id, player_url): - """Turn the encrypted s field into a working signature""" - extract_sig = self._cached( - self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s)) - func = extract_sig(video_id, player_url, s) - return func(s) - - def _decrypt_nsig(self, s, video_id, player_url): - """Turn the encrypted n field into a working signature""" - if player_url is None: - raise ExtractorError('Cannot decrypt nsig without player_url') - player_url = urljoin('https://www.youtube.com', player_url) - - try: - jsi, _, func_code = self._extract_n_function_code(video_id, player_url) - except ExtractorError as e: - raise ExtractorError('Unable to extract nsig function code', cause=e) - - try: - extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url) - ret = extract_nsig(jsi, func_code)(s) - except JSInterpreter.Exception as e: - try: - jsi = PhantomJSwrapper(self, timeout=5000) - except ExtractorError: - raise e - self.report_warning( - f'Native nsig extraction failed: Trying with PhantomJS\n' - f' n = {s} ; player = {player_url}', video_id) - self.write_debug(e, only_once=True) - - args, func_body = func_code - ret = jsi.execute( - f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));', - video_id=video_id, note='Executing signature code').strip() - - self.write_debug(f'Decrypted nsig {s} => {ret}') - # Only cache nsig func JS code to disk if successful, and only once - self._store_player_data_to_cache('nsig', player_url, func_code) - return ret - - def _extract_n_function_name(self, jscode, player_url=None): - varname, global_list = self._interpret_player_js_global_var(jscode, player_url) - if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)): - pattern = r'''(?x) - \{\s*return\s+%s\[%d\]\s*\+\s*(?P[a-zA-Z0-9_$]+)\s*\} - ''' % (re.escape(varname), global_list.index(debug_str)) - if match := re.search(pattern, jscode): - pattern = r'''(?x) - \{\s*\)%s\(\s* - (?: - (?P[a-zA-Z0-9_$]+)\s*noitcnuf\s* - |noitcnuf\s*=\s*(?P[a-zA-Z0-9_$]+)(?:\s+rav)? - )[;\n] - ''' % re.escape(match.group('argname')[::-1]) - if match := re.search(pattern, jscode[match.start()::-1]): - a, b = match.group('funcname_a', 'funcname_b') - return (a or b)[::-1] - self.write_debug(join_nonempty( - 'Initial search was unable to find nsig function name', - player_url and f' player = {player_url}', delim='\n'), only_once=True) - - # Examples (with placeholders nfunc, narray, idx): - # * .get("n"))&&(b=nfunc(b) - # * .get("n"))&&(b=narray[idx](b) - # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c) - # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") - # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("") - # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") - # * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}}; - funcname, idx = self._search_regex( - r'''(?x) - (?: - \.get\("n"\)\)&&\(b=| - (?: - b=String\.fromCharCode\(110\)| - (?P[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\] - ) - (?: - ,[a-zA-Z0-9_$]+\(a\))?,c=a\. - (?: - get\(b\)| - [a-zA-Z0-9_$]+\[b\]\|\|null - )\)&&\(c=| - \b(?P[a-zA-Z0-9_$]+)= - )(?P[a-zA-Z0-9_$]+)(?:\[(?P\d+)\])?\([a-zA-Z]\) - (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''', - jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None)) - if not funcname: - self.report_warning(join_nonempty( - 'Falling back to generic n function search', - player_url and f' player = {player_url}', delim='\n'), only_once=True) - return self._search_regex( - r'''(?xs) - ;\s*(?P[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\) - \s*\{(?:(?!};).)+?return\s*(?P["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''', - jscode, 'Initial JS player n function name', group='name') - elif not idx: - return funcname - - return json.loads(js_to_json(self._search_regex( - rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode, - f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)] - - def _interpret_player_js_global_var(self, jscode, player_url): - """Returns tuple of: variable name string, variable value list""" - extract_global_var = self._cached(self._search_regex, 'js global array', player_url) - varcode, varname, varvalue = extract_global_var( - r'''(?x) - (?P["\'])use\s+strict(?P=q1);\s* - (?P - var\s+(?P[a-zA-Z0-9_$]+)\s*=\s* - (?P - (?P["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2) - \.split\((?P["\'])(?:(?!(?P=q3)).)+(?P=q3)\) - |\[\s*(?:(?P["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\] - ) - )[;,] - ''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None)) - if not varcode: - self.write_debug(join_nonempty( - 'No global array variable found in player JS', - player_url and f' player = {player_url}', delim='\n'), only_once=True) - return None, None - - jsi = JSInterpreter(varcode) - interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url) - return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10) - - def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url): - # Fixup global array - varname, global_list = self._interpret_player_js_global_var(jscode, player_url) - if varname and global_list: - nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}' - else: - varname = self._DUMMY_STRING - global_list = [] - - # Fixup typeof check - undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+' - fixed_code = re.sub( - fr'''(?x) - ;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?: - (["\'])undefined\1| - {re.escape(varname)}\[{undefined_idx}\] - )\s*\)\s*return\s+{re.escape(argnames[0])}; - ''', ';', nsig_code) - if fixed_code == nsig_code: - self.write_debug(join_nonempty( - 'No typeof statement found in nsig function code', - player_url and f' player = {player_url}', delim='\n'), only_once=True) - - # Fixup global funcs - jsi = JSInterpreter(fixed_code) - cache_id = (self._NSIG_FUNC_CACHE_ID, player_url) - try: - self._cached( - self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING) - except JSInterpreter.Exception: - self._player_cache.pop(cache_id, None) - - global_funcnames = jsi._undefined_varnames - debug_names = [] - jsi = JSInterpreter(jscode) - for func_name in global_funcnames: - try: - func_args, func_code = jsi.extract_function_code(func_name) - fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}' - debug_names.append(func_name) - except Exception: - self.report_warning(join_nonempty( - f'Unable to extract global nsig function {func_name} from player JS', - player_url and f' player = {player_url}', delim='\n'), only_once=True) - - if debug_names: - self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}') - - return argnames, fixed_code - - def _extract_n_function_code(self, video_id, player_url): - player_id = self._extract_player_info(player_url) - func_code = self._load_player_data_from_cache('nsig', player_url) - jscode = func_code or self._load_player(video_id, player_url) - jsi = JSInterpreter(jscode) - - if func_code: - return jsi, player_id, func_code - - func_name = self._extract_n_function_name(jscode, player_url=player_url) - - # XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions - func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url) - - return jsi, player_id, func_code - - def _extract_n_function_from_code(self, jsi, func_code): - func = jsi.extract_function_from_code(*func_code) - - def extract_nsig(s): - try: - ret = func([s]) - except JSInterpreter.Exception: - raise - except Exception as e: - raise JSInterpreter.Exception(traceback.format_exc(), cause=e) - - if ret.startswith('enhanced_except_') or ret.endswith(s): - raise JSInterpreter.Exception('Signature function returned an exception') - return ret - - return extract_nsig - def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): """ Extract signatureTimestamp (sts) Required to tell API what sig/player version is in use. """ + CACHE_ENABLED = False # TODO: enable when preprocessed player JS cache is solved/enabled + player_sts_override = self._get_player_js_version()[0] if player_sts_override: return int(player_sts_override) - if sts := traverse_obj(ytcfg, ('STS', {int_or_none})): + sts = traverse_obj(ytcfg, ('STS', {int_or_none})) + if sts: return sts if not player_url: @@ -2344,15 +2098,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.report_warning(error_msg) return None - sts = self._load_player_data_from_cache('sts', player_url) - if sts: + if CACHE_ENABLED and (sts := self._load_player_data_from_cache('sts', player_url)): return sts if code := self._load_player(video_id, player_url, fatal=fatal): sts = int_or_none(self._search_regex( r'(?:signatureTimestamp|sts)\s*:\s*(?P[0-9]{5})', code, 'JS player signature timestamp', group='sts', fatal=fatal)) - if sts: + if CACHE_ENABLED and sts: self._store_player_data_to_cache('sts', player_url, sts) return sts @@ -3020,9 +2773,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_requested_clients(self, url, smuggled_data, is_premium_subscriber): requested_clients = [] excluded_clients = [] + js_runtime_available = any(p.is_available() for p in self._jsc_director.providers.values()) default_clients = ( self._DEFAULT_PREMIUM_CLIENTS if is_premium_subscriber else self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated + else self._DEFAULT_JSLESS_CLIENTS if not js_runtime_available else self._DEFAULT_CLIENTS ) allowed_clients = sorted( @@ -3039,6 +2794,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.report_warning(f'Skipping unsupported client "{client}"') else: requested_clients.append(client) + + if not (requested_clients or excluded_clients) and default_clients == self._DEFAULT_JSLESS_CLIENTS: + self.report_warning( + f'No supported JavaScript runtime could be found. YouTube extraction without ' + f'a JS runtime has been deprecated, and some formats may be missing. ' + f'See {_EJS_WIKI_URL} for details on installing one. To silence this warning, ' + f'you can use --extractor-args "youtube:player_client=default"', only_once=True) + if not requested_clients: requested_clients.extend(default_clients) for excluded_client in excluded_clients: @@ -3173,12 +2936,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber + sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})): f[STREAMING_DATA_CLIENT_NAME] = client f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func f[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber f[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token) - f[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp if deprioritize_pr: deprioritized_prs.append(pr) else: @@ -3258,12 +3021,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: self.report_warning(msg, only_once=True) - def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): + def _extract_formats_and_subtitles(self, video_id, player_responses, player_url, live_status, duration): CHUNK_SIZE = 10 << 20 PREFERRED_LANG_VALUE = 10 original_language = None itags, stream_ids = collections.defaultdict(set), [] itag_qualities, res_qualities = {}, {0: None} + subtitles = {} q = qualities([ # Normally tiny is the smallest video-only formats. But # audio-only formats with unknown quality may get tagged as tiny @@ -3271,7 +3035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres', ]) - streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...)) format_types = self._configuration_arg('formats') all_formats = 'duplicate' in format_types if self._configuration_arg('include_duplicate_formats'): @@ -3279,6 +3042,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. ' 'Use formats=duplicate extractor argument instead') + def solve_sig(s, spec): + return ''.join(s[i] for i in spec) + def build_fragments(f): return LazyList({ 'url': update_url_query(f['url'], { @@ -3298,279 +3064,360 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # For handling potential pre-playback required waiting period playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6) - for fmt in streaming_formats: - client_name = fmt[STREAMING_DATA_CLIENT_NAME] - available_at = fmt[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait - if fmt.get('targetDurationSec'): + for pr in player_responses: + streaming_data = traverse_obj(pr, 'streamingData') + if not streaming_data: continue + fetch_po_token_func = streaming_data[STREAMING_DATA_FETCH_GVS_PO_TOKEN] + is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] + player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] + client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME) + available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait + streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...)) - itag = str_or_none(fmt.get('itag')) - audio_track = fmt.get('audioTrack') or {} - stream_id = (itag, audio_track.get('id'), fmt.get('isDrc')) - if not all_formats: - if stream_id in stream_ids: - continue + def get_stream_id(fmt_stream): + return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc') - quality = fmt.get('quality') - height = int_or_none(fmt.get('height')) - if quality == 'tiny' or not quality: - quality = fmt.get('audioQuality', '').lower() or quality - # The 3gp format (17) in android client has a quality of "small", - # but is actually worse than other formats - if itag == '17': - quality = 'tiny' - if quality: - if itag: - itag_qualities[itag] = quality - if height: - res_qualities[height] = quality + def process_format_stream(fmt_stream, proto, missing_pot): + nonlocal original_language + itag = str_or_none(fmt_stream.get('itag')) + audio_track = fmt_stream.get('audioTrack') or {} + quality = fmt_stream.get('quality') + height = int_or_none(fmt_stream.get('height')) + if quality == 'tiny' or not quality: + quality = fmt_stream.get('audioQuality', '').lower() or quality + # The 3gp format (17) in android client has a quality of "small", + # but is actually worse than other formats + if itag == '17': + quality = 'tiny' + if quality: + if itag: + itag_qualities[itag] = quality + if height: + res_qualities[height] = quality - display_name = audio_track.get('displayName') or '' - is_original = 'original' in display_name.lower() - is_descriptive = 'descriptive' in display_name.lower() - is_default = audio_track.get('audioIsDefault') - language_code = audio_track.get('id', '').split('.')[0] - if language_code and (is_original or (is_default and not original_language)): - original_language = language_code + display_name = audio_track.get('displayName') or '' + is_original = 'original' in display_name.lower() + is_descriptive = 'descriptive' in display_name.lower() + is_default = audio_track.get('audioIsDefault') + language_code = audio_track.get('id', '').split('.')[0] + if language_code and (is_original or (is_default and not original_language)): + original_language = language_code - has_drm = bool(fmt.get('drmFamilies')) + has_drm = bool(fmt_stream.get('drmFamilies')) - # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment - # (adding `&sq=0` to the URL) and parsing emsg box to determine the - # number of fragment that would subsequently requested with (`&sq=N`) - if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not has_drm: - continue - - if has_drm: - msg = f'Some {client_name} client https formats have been skipped as they are DRM protected. ' - if client_name == 'tv': - msg += ( - f'{"Your account" if self.is_authenticated else "The current session"} may have ' - f'an experiment that applies DRM to all videos on the tv client. ' - f'See https://github.com/yt-dlp/yt-dlp/issues/12563 for more details.' - ) - self.report_warning(msg, video_id, only_once=True) - - fmt_url = fmt.get('url') - if not fmt_url: - sc = urllib.parse.parse_qs(fmt.get('signatureCipher')) - fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0])) - encrypted_sig = try_get(sc, lambda x: x['s'][0]) - if not all((sc, fmt_url, player_url, encrypted_sig)): - msg = f'Some {client_name} client https formats have been skipped as they are missing a url. ' - if client_name in ('web', 'web_safari'): - msg += 'YouTube is forcing SABR streaming for this client. ' - else: + if has_drm: + msg = f'Some {client_name} client {proto} formats have been skipped as they are DRM protected. ' + if client_name == 'tv': msg += ( - f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for ' - f'{"your account" if self.is_authenticated else "the current session"}. ' + f'{"Your account" if self.is_authenticated else "The current session"} may have ' + f'an experiment that applies DRM to all videos on the tv client. ' + f'See https://github.com/yt-dlp/yt-dlp/issues/12563 for more details.' ) - msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details' self.report_warning(msg, video_id, only_once=True) - continue - try: - fmt_url += '&{}={}'.format( - traverse_obj(sc, ('sp', -1)) or 'signature', - self._decrypt_signature(encrypted_sig, video_id, player_url), - ) - except ExtractorError as e: + + tbr = float_or_none(fmt_stream.get('averageBitrate') or fmt_stream.get('bitrate'), 1000) + format_duration = traverse_obj(fmt_stream, ('approxDurationMs', {float_or_none(scale=1000)})) + # Some formats may have much smaller duration than others (possibly damaged during encoding) + # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 + # Make sure to avoid false positives with small duration differences. + # E.g. __2ABJjxzNo, ySuUZEjARPY + is_damaged = try_call(lambda: format_duration < duration // 2) + if is_damaged: self.report_warning( - f'Signature extraction failed: Some formats may be missing\n' - f' player = {player_url}\n' - f' {bug_reports_message(before="")}', - video_id=video_id, only_once=True) - self.write_debug( - f'{video_id}: Signature extraction failure info:\n' - f' encrypted sig = {encrypted_sig}\n' - f' player = {player_url}') - self.write_debug(e, only_once=True) - continue + f'Some {client_name} client {proto} formats are possibly damaged. They will be deprioritized', video_id, only_once=True) - query = parse_qs(fmt_url) - if query.get('n'): - try: - decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0]) - fmt_url = update_url_query(fmt_url, { - 'n': decrypt_nsig(query['n'][0], video_id, player_url), - }) - except ExtractorError as e: - if player_url: - self.report_warning( - f'nsig extraction failed: Some formats may be missing\n' - f' n = {query["n"][0]} ; player = {player_url}\n' - f' {bug_reports_message(before="")}', - video_id=video_id, only_once=True) - self.write_debug(e, only_once=True) - else: - self.report_warning( - 'Cannot decrypt nsig without player_url: Some formats may be missing', - video_id=video_id, only_once=True) - continue + if missing_pot and 'missing_pot' not in self._configuration_arg('formats'): + self._report_pot_format_skipped(video_id, client_name, proto) + return None - tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) - format_duration = traverse_obj(fmt, ('approxDurationMs', {float_or_none(scale=1000)})) - # Some formats may have much smaller duration than others (possibly damaged during encoding) - # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 - # Make sure to avoid false positives with small duration differences. - # E.g. __2ABJjxzNo, ySuUZEjARPY - is_damaged = try_call(lambda: format_duration < duration // 2) - if is_damaged: - self.report_warning( - 'Some formats are possibly damaged. They will be deprioritized', video_id, only_once=True) - - fetch_po_token_func = fmt[STREAMING_DATA_FETCH_GVS_PO_TOKEN] - pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS] - - require_po_token = ( - itag not in ['18'] - and gvs_pot_required( - pot_policy, fmt[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER], - fmt[STREAMING_DATA_PLAYER_TOKEN_PROVIDED])) - - po_token = ( - gvs_pots.get(client_name) - or fetch_po_token_func(required=require_po_token or pot_policy.recommended)) - - if po_token: - fmt_url = update_url_query(fmt_url, {'pot': po_token}) - if client_name not in gvs_pots: - gvs_pots[client_name] = po_token - - if not po_token and require_po_token and 'missing_pot' not in self._configuration_arg('formats'): - self._report_pot_format_skipped(video_id, client_name, 'https') - continue - - name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or '' - fps = int_or_none(fmt.get('fps')) or 0 - dct = { - 'asr': int_or_none(fmt.get('audioSampleRate')), - 'filesize': int_or_none(fmt.get('contentLength')), - 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', - 'format_note': join_nonempty( - join_nonempty(display_name, is_default and ' (default)', delim=''), - name, fmt.get('isDrc') and 'DRC', - try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), - try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), - is_damaged and 'DAMAGED', require_po_token and not po_token and 'MISSING POT', - (self.get_param('verbose') or all_formats) and short_client_name(client_name), - delim=', '), - # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 - 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0), - 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1 - 'audio_channels': fmt.get('audioChannels'), - 'height': height, - 'quality': q(quality) - bool(fmt.get('isDrc')) / 2, - 'has_drm': has_drm, - 'tbr': tbr, - 'filesize_approx': filesize_from_tbr(tbr, format_duration), - 'url': fmt_url, - 'width': int_or_none(fmt.get('width')), - 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, - 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1, - # Strictly de-prioritize damaged and 3gp formats - 'preference': -10 if is_damaged else -2 if itag == '17' else None, - } - mime_mobj = re.match( - r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') - if mime_mobj: - dct['ext'] = mimetype2ext(mime_mobj.group(1)) - dct.update(parse_codecs(mime_mobj.group(2))) - if itag: - itags[itag].add(('https', dct.get('language'))) - stream_ids.append(stream_id) - single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec')) - if single_stream and dct.get('ext'): - dct['container'] = dct['ext'] + '_dash' - - # For handling potential pre-playback required waiting period - if live_status not in ('is_live', 'post_live'): - dct['available_at'] = available_at - - if (all_formats or 'dashy' in format_types) and dct['filesize']: - yield { - **dct, - 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'], - 'protocol': 'http_dash_segments', - 'fragments': build_fragments(dct), + name = fmt_stream.get('qualityLabel') or quality.replace('audio_quality_', '') or '' + fps = int_or_none(fmt_stream.get('fps')) or 0 + dct = { + 'asr': int_or_none(fmt_stream.get('audioSampleRate')), + 'filesize': int_or_none(fmt_stream.get('contentLength')), + 'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}', + 'format_note': join_nonempty( + join_nonempty(display_name, is_default and ' (default)', delim=''), + name, fmt_stream.get('isDrc') and 'DRC', + try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), + try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), + is_damaged and 'DAMAGED', missing_pot and 'MISSING POT', + (self.get_param('verbose') or all_formats) and short_client_name(client_name), + delim=', '), + # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 + 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0), + 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1 + 'audio_channels': fmt_stream.get('audioChannels'), + 'height': height, + 'quality': q(quality) - bool(fmt_stream.get('isDrc')) / 2, + 'has_drm': has_drm, + 'tbr': tbr, + 'filesize_approx': filesize_from_tbr(tbr, format_duration), + 'width': int_or_none(fmt_stream.get('width')), + 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, + 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1, + # Strictly de-prioritize damaged and 3gp formats + 'preference': -10 if is_damaged else -2 if itag == '17' else None, } - if all_formats or 'dashy' not in format_types: - dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE} - yield dct + mime_mobj = re.match( + r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt_stream.get('mimeType') or '') + if mime_mobj: + dct['ext'] = mimetype2ext(mime_mobj.group(1)) + dct.update(parse_codecs(mime_mobj.group(2))) - needs_live_processing = self._needs_live_processing(live_status, duration) - skip_bad_formats = 'incomplete' not in format_types + single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec')) + if single_stream and dct.get('ext'): + dct['container'] = dct['ext'] + '_dash' - skip_manifests = set(self._configuration_arg('skip')) - if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway - or (needs_live_processing and skip_bad_formats)): - skip_manifests.add('hls') - if skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live': - skip_manifests.add('dash') + return dct - def process_manifest_format(f, proto, client_name, itag, missing_pot): - key = (proto, f.get('language')) - if not all_formats and key in itags[itag]: - return False + def process_https_formats(): + proto = 'https' + https_fmts = [] + for fmt_stream in streaming_formats: + if fmt_stream.get('targetDurationSec'): + continue - # For handling potential pre-playback required waiting period - if live_status not in ('is_live', 'post_live'): - f['available_at'] = available_at + # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment + # (adding `&sq=0` to the URL) and parsing emsg box to determine the + # number of fragment that would subsequently requested with (`&sq=N`) + if fmt_stream.get('type') == 'FORMAT_STREAM_TYPE_OTF' and not bool(fmt_stream.get('drmFamilies')): + continue - if f.get('source_preference') is None: - f['source_preference'] = -1 + stream_id = get_stream_id(fmt_stream) + if not all_formats: + if stream_id in stream_ids: + continue - # Deprioritize since its pre-merged m3u8 formats may have lower quality audio streams - if client_name == 'web_safari' and proto == 'hls' and live_status != 'is_live': - f['source_preference'] -= 1 + pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS] - if missing_pot: - f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') - f['source_preference'] -= 20 + require_po_token = ( + stream_id[0] not in ['18'] + and gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided)) - itags[itag].add(key) + po_token = ( + gvs_pots.get(client_name) + or fetch_po_token_func(required=require_po_token or pot_policy.recommended)) + if po_token: + if client_name not in gvs_pots: + gvs_pots[client_name] = po_token - if itag and all_formats: - f['format_id'] = f'{itag}-{proto}' - elif any(p != proto for p, _ in itags[itag]): - f['format_id'] = f'{itag}-{proto}' - elif itag: - f['format_id'] = itag + fmt_url = fmt_stream.get('url') + encrypted_sig, sc = None, None + if not fmt_url: + sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher')) + fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0])) + encrypted_sig = try_get(sc, lambda x: x['s'][0]) + if not all((sc, fmt_url, player_url, encrypted_sig)): + msg = f'Some {client_name} client https formats have been skipped as they are missing a url. ' + if client_name in ('web', 'web_safari'): + msg += 'YouTube is forcing SABR streaming for this client. ' + else: + msg += ( + f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for ' + f'{"your account" if self.is_authenticated else "the current session"}. ' + ) + msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details' + self.report_warning(msg, video_id, only_once=True) + continue - if original_language and f.get('language') == original_language: - f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ') - f['language_preference'] = PREFERRED_LANG_VALUE + fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token) + if not fmt: + continue - if itag in ('616', '235'): - f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ') - f['source_preference'] += 100 + # signature + # Attempt to load sig spec from cache + if encrypted_sig: + spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig)) + spec = self._load_sig_spec_from_cache(spec_cache_id) + if spec: + self.write_debug(f'Using cached signature function {spec_cache_id}', only_once=True) + fmt_url += '&{}={}'.format(traverse_obj(sc, ('sp', -1)) or 'signature', + solve_sig(encrypted_sig, spec)) + else: + fmt['_jsc_s_challenge'] = encrypted_sig + fmt['_jsc_s_sc'] = sc - f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)) - if f['quality'] == -1 and f.get('height'): - f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))]) - if self.get_param('verbose') or all_formats: - f['format_note'] = join_nonempty( - f.get('format_note'), short_client_name(client_name), delim=', ') - if f.get('fps') and f['fps'] <= 1: - del f['fps'] + # n challenge + query = parse_qs(fmt_url) + if query.get('n'): + n_challenge = query['n'][0] + if n_challenge in self._player_cache: + fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]}) + else: + fmt['_jsc_n_challenge'] = n_challenge - if proto == 'hls' and f.get('has_drm'): - f['has_drm'] = 'maybe' - f['source_preference'] -= 5 - return True + if po_token: + fmt_url = update_url_query(fmt_url, {'pot': po_token}) - subtitles = {} - for sd in streaming_data: - client_name = sd[STREAMING_DATA_CLIENT_NAME] - fetch_pot_func = sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] - is_premium_subscriber = sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] - has_player_token = sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] + fmt['url'] = fmt_url - hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl') + if stream_id[0]: + itags[stream_id[0]].add((proto, fmt.get('language'))) + stream_ids.append(stream_id) + + # For handling potential pre-playback required waiting period + if live_status not in ('is_live', 'post_live'): + fmt['available_at'] = available_at + + if (all_formats or 'dashy' in format_types) and fmt['filesize']: + https_fmts.append({ + **fmt, + 'format_id': f'{fmt["format_id"]}-dashy' if all_formats else fmt['format_id'], + 'protocol': 'http_dash_segments', + 'fragments': build_fragments(fmt), + }) + if all_formats or 'dashy' not in format_types: + fmt['downloader_options'] = {'http_chunk_size': CHUNK_SIZE} + https_fmts.append(fmt) + + # Bulk process sig/n handling + # Retrieve all JSC Sig and n requests for this player response in one go + n_challenges = {} + s_challenges = {} + for fmt in https_fmts: + # This will de-duplicate requests + n_challenge = fmt.pop('_jsc_n_challenge', None) + if n_challenge is not None: + n_challenges.setdefault(n_challenge, []).append(fmt) + + s_challenge = fmt.pop('_jsc_s_challenge', None) + if s_challenge is not None: + s_challenges.setdefault(len(s_challenge), {}).setdefault(s_challenge, []).append(fmt) + + challenge_requests = [] + if n_challenges: + challenge_requests.append(JsChallengeRequest( + type=JsChallengeType.N, + video_id=video_id, + input=NChallengeInput(challenges=list(n_challenges.keys()), player_url=player_url))) + if s_challenges: + challenge_requests.append(JsChallengeRequest( + type=JsChallengeType.SIG, + video_id=video_id, + input=SigChallengeInput(challenges=[''.join(map(chr, range(spec_id))) for spec_id in s_challenges], player_url=player_url))) + + if challenge_requests: + for _challenge_request, challenge_response in self._jsc_director.bulk_solve(challenge_requests): + if challenge_response.type == JsChallengeType.SIG: + for challenge, result in challenge_response.output.results.items(): + spec_id = len(challenge) + spec = [ord(c) for c in result] + self._store_sig_spec_to_cache(self._sig_spec_cache_id(player_url, spec_id), spec) + s_challenge_data = s_challenges.pop(spec_id, {}) + if not s_challenge_data: + continue + for s_challenge, fmts in s_challenge_data.items(): + solved_challenge = solve_sig(s_challenge, spec) + for fmt in fmts: + sc = fmt.pop('_jsc_s_sc') + fmt['url'] += '&{}={}'.format( + traverse_obj(sc, ('sp', -1)) or 'signature', + solved_challenge) + + elif challenge_response.type == JsChallengeType.N: + for challenge, result in challenge_response.output.results.items(): + fmts = n_challenges.pop(challenge, []) + for fmt in fmts: + self._player_cache[challenge] = result + fmt['url'] = update_url_query(fmt['url'], {'n': result}) + + # Raise warning if any challenge requests remain + # Depending on type of challenge request + + help_message = ( + 'Ensure you have a supported JavaScript runtime and ' + 'challenge solver script distribution installed. ' + 'Review any warnings presented before this message. ' + f'For more details, refer to {_EJS_WIKI_URL}') + + if s_challenges: + self.report_warning( + f'Signature solving failed: Some formats may be missing. {help_message}', + video_id=video_id, only_once=True) + if n_challenges: + self.report_warning( + f'n challenge solving failed: Some formats may be missing. {help_message}', + video_id=video_id, only_once=True) + + for cfmts in list(s_challenges.values()) + list(n_challenges.values()): + for fmt in cfmts: + if fmt in https_fmts: + https_fmts.remove(fmt) + + yield from https_fmts + + yield from process_https_formats() + + needs_live_processing = self._needs_live_processing(live_status, duration) + skip_bad_formats = 'incomplete' not in format_types + + skip_manifests = set(self._configuration_arg('skip')) + if (needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway + or (needs_live_processing and skip_bad_formats)): + skip_manifests.add('hls') + + if skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live': + skip_manifests.add('dash') + + def process_manifest_format(f, proto, client_name, itag, missing_pot): + key = (proto, f.get('language')) + if not all_formats and key in itags[itag]: + return False + + # For handling potential pre-playback required waiting period + if live_status not in ('is_live', 'post_live'): + f['available_at'] = available_at + + if f.get('source_preference') is None: + f['source_preference'] = -1 + + # Deprioritize since its pre-merged m3u8 formats may have lower quality audio streams + if client_name == 'web_safari' and proto == 'hls' and live_status != 'is_live': + f['source_preference'] -= 1 + + if missing_pot: + f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') + f['source_preference'] -= 20 + + itags[itag].add(key) + + if itag and all_formats: + f['format_id'] = f'{itag}-{proto}' + elif any(p != proto for p, _ in itags[itag]): + f['format_id'] = f'{itag}-{proto}' + elif itag: + f['format_id'] = itag + + if original_language and f.get('language') == original_language: + f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ') + f['language_preference'] = PREFERRED_LANG_VALUE + + if itag in ('616', '235'): + f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ') + f['source_preference'] += 100 + + f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)) + if f['quality'] == -1 and f.get('height'): + f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))]) + if self.get_param('verbose') or all_formats: + f['format_note'] = join_nonempty( + f.get('format_note'), short_client_name(client_name), delim=', ') + if f.get('fps') and f['fps'] <= 1: + del f['fps'] + + if proto == 'hls' and f.get('has_drm'): + f['has_drm'] = 'maybe' + f['source_preference'] -= 5 + return True + + hls_manifest_url = 'hls' not in skip_manifests and streaming_data.get('hlsManifestUrl') if hls_manifest_url: pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg( client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS] - require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token) - po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended)) + require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided) + po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended)) if po_token: hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}' if client_name not in gvs_pots: @@ -3590,12 +3437,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'/itag/(\d+)', f['url'], 'itag', default=None), require_po_token and not po_token): yield f - dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl') + dash_manifest_url = 'dash' not in skip_manifests and streaming_data.get('dashManifestUrl') if dash_manifest_url: pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg( client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH] - require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token) - po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended)) + require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, player_token_provided) + po_token = gvs_pots.get(client_name, fetch_po_token_func(required=require_po_token or pot_policy.recommended)) if po_token: dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}' if client_name not in gvs_pots: @@ -3615,7 +3462,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) if needs_live_processing: f['is_from_start'] = True - yield f yield subtitles @@ -3688,14 +3534,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else 'was_live' if live_content else 'not_live' if False in (is_live, live_content) else None) - streaming_data = traverse_obj(player_responses, (..., 'streamingData')) - *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration) + *formats, subtitles = self._extract_formats_and_subtitles(video_id, player_responses, player_url, live_status, duration) if all(f.get('has_drm') for f in formats): # If there are no formats that definitely don't have DRM, all have DRM for f in formats: f['has_drm'] = True - return live_broadcast_details, live_status, streaming_data, formats, subtitles + return live_broadcast_details, live_status, formats, subtitles def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg): initial_data = None @@ -3855,8 +3700,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or int_or_none(get_first(microformats, 'lengthSeconds')) or parse_duration(search_meta('duration')) or None) - live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \ + live_broadcast_details, live_status, formats, automatic_captions = \ self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration) + streaming_data = traverse_obj(player_responses, (..., 'streamingData')) if live_status == 'post_live': self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode') diff --git a/yt_dlp/extractor/youtube/jsc/README.md b/yt_dlp/extractor/youtube/jsc/README.md new file mode 100644 index 0000000000..1bd7a3ff8a --- /dev/null +++ b/yt_dlp/extractor/youtube/jsc/README.md @@ -0,0 +1,132 @@ +# YoutubeIE JS Challenge Provider Framework + +As part of the YouTube extractor, we have a framework for solving n/sig JS Challenges programmatically. This can be used by plugins. + +> [!TIP] +> If publishing a JS Challenge Provider plugin to GitHub, add the [yt-dlp-jsc-provider](https://github.com/topics/yt-dlp-jsc-provider) topic to your repository to help users find it. + + +## Public APIs + +- `yt_dlp.extractor.youtube.jsc.provider` + +Everything else is **internal-only** and no guarantees are made about the API stability. + +> [!WARNING] +> We will try our best to maintain stability with the public APIs. +> However, due to the nature of extractors and YouTube, we may need to remove or change APIs in the future. +> If you are using these APIs outside yt-dlp plugins, please account for this by importing them safely. + +## JS Challenge Provider + +`yt_dlp.extractor.youtube.jsc.provider` + +```python +from yt_dlp.extractor.youtube.jsc.provider import ( + register_provider, + register_preference, + JsChallengeProvider, + JsChallengeRequest, + JsChallengeResponse, + JsChallengeProviderError, + JsChallengeProviderRejectedRequest, + JsChallengeType, + JsChallengeProviderResponse, + NChallengeOutput, +) +from yt_dlp.utils import traverse_obj, Popen +import json +import subprocess +import typing + +@register_provider +class MyJsChallengeProviderJCP(JsChallengeProvider): # Provider class name must end with "JCP" + PROVIDER_VERSION = '0.2.1' + # Define a unique display name for the provider + PROVIDER_NAME = 'my-provider' + BUG_REPORT_LOCATION = 'https://issues.example.com/report' + + # Set supported challenge types. + # If None, the provider will handle all types. + _SUPPORTED_TYPES = [JsChallengeType.N] + + def is_available(self) -> bool: + """ + Check if the provider is available (e.g. all required dependencies are available) + This is used to determine if the provider should be used and to provide debug information. + + IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations. + + Since this is called multiple times, we recommend caching the result. + """ + return True + + def close(self): + # Optional close hook, called when YoutubeDL is closed. + pass + + def _real_bulk_solve(self, requests: list[JsChallengeRequest]) -> typing.Generator[JsChallengeProviderResponse, None, None]: + # ℹ️ If you need to do additional validation on the requests. + # Raise yt_dlp.extractor.youtube.jsc.provider.JsChallengeProviderRejectedRequest if the request is not supported. + if len("something") > 255: + raise JsChallengeProviderRejectedRequest('Challenges longer than 255 are not supported', expected=True) + + + # ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubejsc-`. + # For this example, the extractor arg would be: + # `--extractor-args "youtubejsc-myjschallengeprovider:bin_path=/path/to/bin"` + bin_path = self._configuration_arg( + 'bin_path', default=['/path/to/bin'])[0] + + # See below for logging guidelines + self.logger.trace(f'Using bin path: {bin_path}') + + for request in requests: + # You can use the _get_player method to get the player JS code if needed. + # This shares the same caching as the YouTube extractor, so it will not make unnecessary requests. + player_js = self._get_player(request.video_id, request.input.player_url) + cmd = f'{bin_path} {request.input.challenges} {player_js}' + self.logger.info(f'Executing command: {cmd}') + stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE) + if ret != 0: + # ℹ️ If there is an error, raise JsChallengeProviderError. + # The request will be sent to the next provider if there is one. + # You can specify whether it is expected or not. If it is unexpected, + # the log will include a link to the bug report location (BUG_REPORT_LOCATION). + + # raise JsChallengeProviderError(f'Command returned error code {ret}', expected=False) + + # You can also only fail this specific request by returning a JsChallengeProviderResponse with the error. + # This will allow other requests to be processed by this provider. + yield JsChallengeProviderResponse( + request=request, + error=JsChallengeProviderError(f'Command returned error code {ret}', expected=False) + ) + + yield JsChallengeProviderResponse( + request=request, + response=JsChallengeResponse( + type=JsChallengeType.N, + output=NChallengeOutput(results=traverse_obj(json.loads(stdout))), + )) + + +# If there are multiple JS Challenge Providers that can handle the same JsChallengeRequest(s), +# you can define a preference function to increase/decrease the priority of providers. + +@register_preference(MyJsChallengeProviderJCP) +def my_provider_preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int: + return 50 +``` + +## Logging Guidelines + +- Use the `self.logger` object to log messages. +- When making HTTP requests or any other time-expensive operation, use `self.logger.info` to log a message to standard non-verbose output. + - This lets users know what is happening when a time-expensive operation is taking place. +- Technical information such as a command being executed should be logged to `self.logger.debug` +- Use `self.logger.trace` for very detailed information that is only useful for debugging to avoid cluttering the debug log. + +## Debugging + +- Use `-v --extractor-args "youtube:jsc_trace=true"` to enable JS Challenge debug output. diff --git a/yt_dlp/extractor/youtube/jsc/__init__.py b/yt_dlp/extractor/youtube/jsc/__init__.py new file mode 100644 index 0000000000..b0a0f037d0 --- /dev/null +++ b/yt_dlp/extractor/youtube/jsc/__init__.py @@ -0,0 +1,5 @@ +# Trigger import of built-in providers +from ._builtin.bun import BunJCP as _BunJCP # noqa: F401 +from ._builtin.deno import DenoJCP as _DenoJCP # noqa: F401 +from ._builtin.node import NodeJCP as _NodeJCP # noqa: F401 +from ._builtin.quickjs import QuickJSJCP as _QuickJSJCP # noqa: F401 diff --git a/yt_dlp/extractor/youtube/jsc/_builtin/__init__.py b/yt_dlp/extractor/youtube/jsc/_builtin/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/yt_dlp/extractor/youtube/jsc/_builtin/bun.py b/yt_dlp/extractor/youtube/jsc/_builtin/bun.py new file mode 100644 index 0000000000..0a247ba971 --- /dev/null +++ b/yt_dlp/extractor/youtube/jsc/_builtin/bun.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import os +import re +import shlex +import subprocess +import urllib.parse + +from yt_dlp.extractor.youtube.jsc._builtin.ejs import ( + _EJS_WIKI_URL, + EJSBaseJCP, + Script, + ScriptSource, + ScriptType, + ScriptVariant, +) +from yt_dlp.extractor.youtube.jsc._builtin.vendor import load_script +from yt_dlp.extractor.youtube.jsc.provider import ( + JsChallengeProvider, + JsChallengeProviderError, + JsChallengeRequest, + register_preference, + register_provider, +) +from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider +from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message +from yt_dlp.utils import Popen +from yt_dlp.utils.networking import HTTPHeaderDict, clean_proxies + +# KNOWN ISSUES: +# - If node_modules is present and includes a requested lib, the version we request is ignored +# and whatever installed in node_modules is used. +# - No way to ignore existing node_modules, lock files, etc. +# - No sandboxing options available +# - Cannot detect if npm packages are cached without potentially downloading them. +# `--no-install` appears to disable the cache. +# - npm auto-install may fail with an integrity error when using HTTP proxies +# - npm auto-install HTTP proxy support may be limited on older Bun versions + + +@register_provider +class BunJCP(EJSBaseJCP, BuiltinIEContentProvider): + PROVIDER_NAME = 'bun' + JS_RUNTIME_NAME = 'bun' + BUN_NPM_LIB_FILENAME = 'yt.solver.bun.lib.js' + SUPPORTED_PROXY_SCHEMES = ['http', 'https'] + + def _iter_script_sources(self): + yield from super()._iter_script_sources() + yield ScriptSource.BUILTIN, self._bun_npm_source + + def _bun_npm_source(self, script_type: ScriptType, /): + if script_type != ScriptType.LIB: + return None + if 'ejs:npm' not in self.ie.get_param('remote_components', []): + return self._skip_component('ejs:npm') + + # Check to see if the environment proxies are compatible with Bun npm source + if unsupported_scheme := self._check_env_proxies(self._get_env_options()): + self.logger.warning( + f'Bun NPM package downloads only support HTTP/HTTPS proxies; skipping remote NPM package downloads. ' + f'Provide another distribution of the challenge solver script or use ' + f'another JS runtime that supports "{unsupported_scheme}" proxies. ' + f'For more information and alternatives, refer to {_EJS_WIKI_URL}') + return None + + # Bun-specific lib scripts that uses Bun autoimport + # https://bun.com/docs/runtime/autoimport + error_hook = lambda e: self.logger.warning( + f'Failed to read bun challenge solver lib script: {e}{provider_bug_report_message(self)}') + code = load_script( + self.BUN_NPM_LIB_FILENAME, error_hook=error_hook) + if code: + return Script(script_type, ScriptVariant.BUN_NPM, ScriptSource.BUILTIN, self._SCRIPT_VERSION, code) + return None + + def _check_env_proxies(self, env): + # check that the schemes of both HTTP_PROXY and HTTPS_PROXY are supported + for key in ('HTTP_PROXY', 'HTTPS_PROXY'): + proxy = env.get(key) + if not proxy: + continue + scheme = urllib.parse.urlparse(proxy).scheme.lower() + if scheme not in self.SUPPORTED_PROXY_SCHEMES: + return scheme + return None + + def _get_env_options(self) -> dict[str, str]: + options = os.environ.copy() # pass through existing bun env vars + request_proxies = self.ie._downloader.proxies.copy() + clean_proxies(request_proxies, HTTPHeaderDict()) + + # Apply 'all' proxy first, then allow per-scheme overrides + if request_proxies.get('all') is not None: + options['HTTP_PROXY'] = options['HTTPS_PROXY'] = request_proxies['all'] + for key, env in (('http', 'HTTP_PROXY'), ('https', 'HTTPS_PROXY')): + val = request_proxies.get(key) + if val is not None: + options[env] = val + if self.ie.get_param('nocheckcertificate'): + options['NODE_TLS_REJECT_UNAUTHORIZED'] = '0' + + # Prevent segfault: + options.pop('JSC_useJIT', None) + if self.ejs_setting('jitless', ['false']) != ['false']: + options['BUN_JSC_useJIT'] = '0' + + return options + + def _run_js_runtime(self, stdin: str, /) -> str: + # https://bun.com/docs/cli/run + options = ['--no-addons', '--prefer-offline'] + if self._lib_script.variant == ScriptVariant.BUN_NPM: + # Enable auto-install even if node_modules is present + options.append('--install=fallback') + else: + options.append('--no-install') + cmd = [self.runtime_info.path, '--bun', 'run', *options, '-'] + self.logger.debug(f'Running bun: {shlex.join(cmd)}') + + with Popen( + cmd, + text=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self._get_env_options(), + ) as proc: + stdout, stderr = proc.communicate_or_kill(stdin) + stderr = self._clean_stderr(stderr) + if proc.returncode or stderr: + msg = f'Error running bun process (returncode: {proc.returncode})' + if stderr: + msg = f'{msg}: {stderr.strip()}' + raise JsChallengeProviderError(msg) + return stdout + + def _clean_stderr(self, stderr): + return '\n'.join( + line for line in stderr.splitlines() + if not re.match(r'^Bun v\d+\.\d+\.\d+ \([\w\s]+\)$', line)) + + +@register_preference(BunJCP) +def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int: + return 800 diff --git a/yt_dlp/extractor/youtube/jsc/_builtin/deno.py b/yt_dlp/extractor/youtube/jsc/_builtin/deno.py new file mode 100644 index 0000000000..0c718db6d3 --- /dev/null +++ b/yt_dlp/extractor/youtube/jsc/_builtin/deno.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import os +import re +import shlex +import subprocess + +from yt_dlp.extractor.youtube.jsc._builtin.ejs import ( + EJSBaseJCP, + Script, + ScriptSource, + ScriptType, + ScriptVariant, +) +from yt_dlp.extractor.youtube.jsc._builtin.vendor import load_script +from yt_dlp.extractor.youtube.jsc.provider import ( + JsChallengeProvider, + JsChallengeProviderError, + JsChallengeRequest, + register_preference, + register_provider, +) +from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider +from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message +from yt_dlp.utils import Popen, remove_terminal_sequences +from yt_dlp.utils.networking import HTTPHeaderDict, clean_proxies + + +@register_provider +class DenoJCP(EJSBaseJCP, BuiltinIEContentProvider): + PROVIDER_NAME = 'deno' + JS_RUNTIME_NAME = 'deno' + + _DENO_BASE_OPTIONS = ['--no-prompt', '--no-remote', '--no-lock', '--node-modules-dir=none', '--no-config'] + DENO_NPM_LIB_FILENAME = 'yt.solver.deno.lib.js' + _NPM_PACKAGES_CACHED = False + + def _iter_script_sources(self): + yield from super()._iter_script_sources() + yield ScriptSource.BUILTIN, self._deno_npm_source + + def _deno_npm_source(self, script_type: ScriptType, /): + if script_type != ScriptType.LIB: + return None + # Deno-specific lib scripts that use Deno NPM imports + error_hook = lambda e: self.logger.warning( + f'Failed to read deno challenge solver lib script: {e}{provider_bug_report_message(self)}') + code = load_script( + self.DENO_NPM_LIB_FILENAME, error_hook=error_hook) + if not code: + return None + if 'ejs:npm' not in self.ie.get_param('remote_components', []): + # We may still be able to continue if the npm packages are available/cached + self._NPM_PACKAGES_CACHED = self._npm_packages_cached(code) + if not self._NPM_PACKAGES_CACHED: + return self._skip_component('ejs:npm') + return Script(script_type, ScriptVariant.DENO_NPM, ScriptSource.BUILTIN, self._SCRIPT_VERSION, code) + + def _npm_packages_cached(self, stdin: str) -> bool: + # Check if npm packages are cached, so we can run without --remote-components ejs:npm + self.logger.debug('Checking if npm packages are cached') + try: + self._run_deno(stdin, [*self._DENO_BASE_OPTIONS, '--cached-only']) + except JsChallengeProviderError as e: + self.logger.trace(f'Deno npm packages not cached: {e}') + return False + return True + + def _run_js_runtime(self, stdin: str, /) -> str: + options = [*self._DENO_BASE_OPTIONS] + if self._lib_script.variant == ScriptVariant.DENO_NPM and self._NPM_PACKAGES_CACHED: + options.append('--cached-only') + elif self._lib_script.variant != ScriptVariant.DENO_NPM: + options.append('--no-npm') + options.append('--cached-only') + if self.ie.get_param('nocheckcertificate'): + options.append('--unsafely-ignore-certificate-errors') + # XXX: Convert this extractor-arg into a general option if/when a JSI framework is implemented + if self.ejs_setting('jitless', ['false']) != ['false']: + options.append('--v8-flags=--jitless') + return self._run_deno(stdin, options) + + def _get_env_options(self) -> dict[str, str]: + options = os.environ.copy() # pass through existing deno env vars + request_proxies = self.ie._downloader.proxies.copy() + clean_proxies(request_proxies, HTTPHeaderDict()) + # Apply 'all' proxy first, then allow per-scheme overrides + if 'all' in request_proxies and request_proxies['all'] is not None: + options['HTTP_PROXY'] = options['HTTPS_PROXY'] = request_proxies['all'] + for key, env in (('http', 'HTTP_PROXY'), ('https', 'HTTPS_PROXY'), ('no', 'NO_PROXY')): + if key in request_proxies and request_proxies[key] is not None: + options[env] = request_proxies[key] + return options + + def _run_deno(self, stdin, options) -> str: + cmd = [self.runtime_info.path, 'run', *options, '-'] + self.logger.debug(f'Running deno: {shlex.join(cmd)}') + with Popen( + cmd, + text=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self._get_env_options(), + ) as proc: + stdout, stderr = proc.communicate_or_kill(stdin) + stderr = self._clean_stderr(stderr) + if proc.returncode or stderr: + msg = f'Error running deno process (returncode: {proc.returncode})' + if stderr: + msg = f'{msg}: {stderr.strip()}' + raise JsChallengeProviderError(msg) + return stdout + + def _clean_stderr(self, stderr): + return '\n'.join( + line for line in stderr.splitlines() + if not ( + re.match(r'^Download\s+https\S+$', remove_terminal_sequences(line)) + or re.match(r'DANGER: TLS certificate validation is disabled for all hostnames', remove_terminal_sequences(line)))) + + +@register_preference(DenoJCP) +def preference(provider: JsChallengeProvider, requests: list[JsChallengeRequest]) -> int: + return 1000 diff --git a/yt_dlp/extractor/youtube/jsc/_builtin/ejs.py b/yt_dlp/extractor/youtube/jsc/_builtin/ejs.py new file mode 100644 index 0000000000..52d7ecf170 --- /dev/null +++ b/yt_dlp/extractor/youtube/jsc/_builtin/ejs.py @@ -0,0 +1,326 @@ +from __future__ import annotations + +import collections +import dataclasses +import enum +import functools +import hashlib +import json + +from yt_dlp.dependencies import yt_dlp_ejs as _has_ejs +from yt_dlp.extractor.youtube.jsc._builtin import vendor +from yt_dlp.extractor.youtube.jsc.provider import ( + JsChallengeProvider, + JsChallengeProviderError, + JsChallengeProviderRejectedRequest, + JsChallengeProviderResponse, + JsChallengeResponse, + JsChallengeType, + NChallengeOutput, + SigChallengeOutput, +) +from yt_dlp.extractor.youtube.pot._provider import configuration_arg +from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message +from yt_dlp.utils._jsruntime import JsRuntimeInfo + +if _has_ejs: + import yt_dlp_ejs.yt.solver + +TYPE_CHECKING = False +if TYPE_CHECKING: + from collections.abc import Callable, Generator + + from yt_dlp.extractor.youtube.jsc.provider import JsChallengeRequest + +_EJS_WIKI_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/EJS' + + +class ScriptType(enum.Enum): + LIB = 'lib' + CORE = 'core' + + +class ScriptVariant(enum.Enum): + UNKNOWN = 'unknown' + MINIFIED = 'minified' + UNMINIFIED = 'unminified' + DENO_NPM = 'deno_npm' + BUN_NPM = 'bun_npm' + + +class ScriptSource(enum.Enum): + PYPACKAGE = 'python package' # PyPI, PyInstaller exe, zipimport binary, etc + CACHE = 'cache' # GitHub release assets (cached) + WEB = 'web' # GitHub release assets (downloaded) + BUILTIN = 'builtin' # vendored (full core script; import-only lib script + NPM cache) + + +@dataclasses.dataclass +class Script: + type: ScriptType + variant: ScriptVariant + source: ScriptSource + version: str + code: str + + @functools.cached_property + def hash(self, /) -> str: + return hashlib.sha3_512(self.code.encode()).hexdigest() + + def __str__(self, /): + return f'