mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-11 09:21:22 +00:00
Compare commits
1 Commits
master
...
copilot/ad
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
21ec21c936 |
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@@ -1,4 +1,5 @@
|
||||
config-variables:
|
||||
- KEEP_CACHE_WARM
|
||||
- PUSH_VERSION_COMMIT
|
||||
- UPDATE_TO_VERIFICATION
|
||||
- PYPI_PROJECT
|
||||
|
||||
138
.github/workflows/build.yml
vendored
138
.github/workflows/build.yml
vendored
@@ -74,11 +74,11 @@ on:
|
||||
default: true
|
||||
type: boolean
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
process:
|
||||
name: Process
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
origin: ${{ steps.process_inputs.outputs.origin }}
|
||||
@@ -146,6 +146,7 @@ jobs:
|
||||
'runner': 'ubuntu-24.04-arm',
|
||||
'qemu_platform': 'linux/arm/v7',
|
||||
'onefile': False,
|
||||
'cache_requirements': True,
|
||||
'update_to': 'yt-dlp/yt-dlp@2023.03.04',
|
||||
}],
|
||||
'musllinux': [{
|
||||
@@ -174,6 +175,7 @@ jobs:
|
||||
exe.setdefault('qemu_platform', None)
|
||||
exe.setdefault('onefile', True)
|
||||
exe.setdefault('onedir', True)
|
||||
exe.setdefault('cache_requirements', False)
|
||||
exe.setdefault('python_version', os.environ['PYTHON_VERSION'])
|
||||
exe.setdefault('update_to', os.environ['UPDATE_TO'])
|
||||
if not any(INPUTS.get(key) for key in EXE_MAP):
|
||||
@@ -184,11 +186,8 @@ jobs:
|
||||
f.write(f'matrix={json.dumps(matrix)}')
|
||||
|
||||
unix:
|
||||
name: unix
|
||||
needs: [process]
|
||||
needs: process
|
||||
if: inputs.unix
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
CHANNEL: ${{ inputs.channel }}
|
||||
@@ -197,12 +196,11 @@ jobs:
|
||||
UPDATE_TO: yt-dlp/yt-dlp@2025.09.05
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0 # Needed for changelog
|
||||
persist-credentials: false
|
||||
|
||||
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
@@ -231,7 +229,7 @@ jobs:
|
||||
[[ "${version}" != "${downgraded_version}" ]]
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-bin-${{ github.job }}
|
||||
path: |
|
||||
@@ -241,10 +239,8 @@ jobs:
|
||||
|
||||
linux:
|
||||
name: ${{ matrix.os }} (${{ matrix.arch }})
|
||||
needs: [process]
|
||||
if: inputs.linux || inputs.linux_armv7l || inputs.musllinux
|
||||
permissions:
|
||||
contents: read
|
||||
needs: process
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -261,16 +257,26 @@ jobs:
|
||||
SKIP_ONEFILE_BUILD: ${{ (!matrix.onefile && '1') || '' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Cache requirements
|
||||
if: matrix.cache_requirements
|
||||
id: cache-venv
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
|
||||
with:
|
||||
persist-credentials: false
|
||||
path: |
|
||||
venv
|
||||
key: cache-reqs-${{ matrix.os }}_${{ matrix.arch }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }}
|
||||
restore-keys: |
|
||||
cache-reqs-${{ matrix.os }}_${{ matrix.arch }}-${{ github.ref }}-
|
||||
cache-reqs-${{ matrix.os }}_${{ matrix.arch }}-
|
||||
|
||||
- name: Set up QEMU
|
||||
if: matrix.qemu_platform
|
||||
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:qemu-v10.0.4-56@sha256:30cc9a4d03765acac9be2ed0afc23af1ad018aed2c28ea4be8c2eb9afe03fbd1
|
||||
cache-image: false
|
||||
platforms: ${{ matrix.qemu_platform }}
|
||||
|
||||
- name: Build executable
|
||||
@@ -294,7 +300,7 @@ jobs:
|
||||
docker compose up --build --exit-code-from "${SERVICE}" "${SERVICE}"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-bin-${{ matrix.os }}_${{ matrix.arch }}
|
||||
path: |
|
||||
@@ -302,8 +308,7 @@ jobs:
|
||||
compression-level: 0
|
||||
|
||||
macos:
|
||||
name: macos
|
||||
needs: [process]
|
||||
needs: process
|
||||
if: inputs.macos
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -315,12 +320,22 @@ jobs:
|
||||
UPDATE_TO: yt-dlp/yt-dlp@2025.09.05
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- uses: actions/checkout@v5
|
||||
# NB: Building universal2 does not work with python from actions/setup-python
|
||||
|
||||
- name: Cache requirements
|
||||
id: cache-venv
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
|
||||
with:
|
||||
path: |
|
||||
~/yt-dlp-build-venv
|
||||
key: cache-reqs-${{ github.job }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }}
|
||||
restore-keys: |
|
||||
cache-reqs-${{ github.job }}-${{ github.ref }}-
|
||||
cache-reqs-${{ github.job }}-
|
||||
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
brew install coreutils
|
||||
@@ -328,14 +343,14 @@ jobs:
|
||||
brew uninstall --ignore-dependencies python3
|
||||
python3 -m venv ~/yt-dlp-build-venv
|
||||
source ~/yt-dlp-build-venv/bin/activate
|
||||
python3 devscripts/install_deps.py --omit-default --include-extra build
|
||||
python3 devscripts/install_deps.py --print --include-extra pyinstaller > requirements.txt
|
||||
python3 devscripts/install_deps.py --only-optional-groups --include-group build
|
||||
python3 devscripts/install_deps.py --print --include-group pyinstaller > requirements.txt
|
||||
# We need to ignore wheels otherwise we break universal2 builds
|
||||
python3 -m pip install -U --no-binary :all: -r requirements.txt
|
||||
# We need to fuse our own universal2 wheels for curl_cffi
|
||||
python3 -m pip install -U 'delocate==0.11.0'
|
||||
mkdir curl_cffi_whls curl_cffi_universal2
|
||||
python3 devscripts/install_deps.py --print --omit-default --include-extra curl-cffi > requirements.txt
|
||||
python3 devscripts/install_deps.py --print --only-optional-groups --include-group curl-cffi > requirements.txt
|
||||
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
|
||||
python3 -m pip download \
|
||||
--only-binary=:all: \
|
||||
@@ -384,7 +399,7 @@ jobs:
|
||||
[[ "$version" != "$downgraded_version" ]]
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-bin-${{ github.job }}
|
||||
path: |
|
||||
@@ -394,7 +409,7 @@ jobs:
|
||||
|
||||
windows:
|
||||
name: windows (${{ matrix.arch }})
|
||||
needs: [process]
|
||||
needs: process
|
||||
if: inputs.windows
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -407,23 +422,23 @@ jobs:
|
||||
runner: windows-2025
|
||||
python_version: '3.10'
|
||||
platform_tag: win_amd64
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: e28cc13e4ad0cc74330d832202806d0c1976e9165da6047309348ca663c0ed3d
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: b6496c7630c3afe66900cfa824e8234a8c2e2c81704bd7facd79586abc76c0e5
|
||||
- arch: 'x86'
|
||||
runner: windows-2025
|
||||
python_version: '3.10'
|
||||
platform_tag: win32
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: c00f600c17de3bdd589f043f60ab64fc34fcba6dd902ad973af9c8afc74f80d1
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: 2d881843580efdc54f3523507fc6d9c5b6051ee49c743a6d9b7003ac5758c226
|
||||
- arch: 'arm64'
|
||||
runner: windows-11-arm
|
||||
python_version: '3.13' # arm64 only has Python >= 3.11 available
|
||||
platform_tag: win_arm64
|
||||
pyi_version: '6.17.0'
|
||||
pyi_tag: '2025.11.29.054325'
|
||||
pyi_hash: a2033b18b4f7bc6108b5fd76a92c6c1de0a12ec4fe98a23396a9f978cb4b7d7b
|
||||
pyi_version: '6.16.0'
|
||||
pyi_tag: '2025.09.13.221251'
|
||||
pyi_hash: 4250c9085e34a95c898f3ee2f764914fc36ec59f0d97c28e6a75fcf21f7b144f
|
||||
env:
|
||||
CHANNEL: ${{ inputs.channel }}
|
||||
ORIGIN: ${{ needs.process.outputs.origin }}
|
||||
@@ -435,15 +450,26 @@ jobs:
|
||||
PYI_WHEEL: pyinstaller-${{ matrix.pyi_version }}-py3-none-${{ matrix.platform_tag }}.whl
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
architecture: ${{ matrix.arch }}
|
||||
|
||||
- name: Cache requirements
|
||||
id: cache-venv
|
||||
if: matrix.arch == 'arm64'
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
|
||||
with:
|
||||
path: |
|
||||
/yt-dlp-build-venv
|
||||
key: ${{ env.BASE_CACHE_KEY }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }}
|
||||
restore-keys: |
|
||||
${{ env.BASE_CACHE_KEY }}-${{ github.ref }}-
|
||||
${{ env.BASE_CACHE_KEY }}-
|
||||
|
||||
- name: Install Requirements
|
||||
env:
|
||||
ARCH: ${{ matrix.arch }}
|
||||
@@ -451,8 +477,6 @@ jobs:
|
||||
PYI_HASH: ${{ matrix.pyi_hash }}
|
||||
shell: pwsh
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
$PSNativeCommandUseErrorActionPreference = $true
|
||||
python -m venv /yt-dlp-build-venv
|
||||
/yt-dlp-build-venv/Scripts/Activate.ps1
|
||||
python -m pip install -U pip
|
||||
@@ -460,26 +484,22 @@ jobs:
|
||||
mkdir /pyi-wheels
|
||||
python -m pip download -d /pyi-wheels --no-deps --require-hashes "pyinstaller@${Env:PYI_URL}#sha256=${Env:PYI_HASH}"
|
||||
python -m pip install --force-reinstall -U "/pyi-wheels/${Env:PYI_WHEEL}"
|
||||
python devscripts/install_deps.py --omit-default --include-extra build
|
||||
python devscripts/install_deps.py --only-optional-groups --include-group build
|
||||
if ("${Env:ARCH}" -eq "x86") {
|
||||
python devscripts/install_deps.py
|
||||
} else {
|
||||
python devscripts/install_deps.py --include-extra curl-cffi
|
||||
python devscripts/install_deps.py --include-group curl-cffi
|
||||
}
|
||||
|
||||
- name: Prepare
|
||||
shell: pwsh
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
$PSNativeCommandUseErrorActionPreference = $true
|
||||
python devscripts/update-version.py -c "${Env:CHANNEL}" -r "${Env:ORIGIN}" "${Env:VERSION}"
|
||||
python devscripts/make_lazy_extractors.py
|
||||
|
||||
- name: Build
|
||||
shell: pwsh
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
$PSNativeCommandUseErrorActionPreference = $true
|
||||
/yt-dlp-build-venv/Scripts/Activate.ps1
|
||||
python -m bundle.pyinstaller
|
||||
python -m bundle.pyinstaller --onedir
|
||||
@@ -489,8 +509,6 @@ jobs:
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
shell: pwsh
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
$PSNativeCommandUseErrorActionPreference = $true
|
||||
$name = "yt-dlp${Env:SUFFIX}"
|
||||
Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe"
|
||||
$version = & "./dist/${name}.exe" --version
|
||||
@@ -501,7 +519,7 @@ jobs:
|
||||
}
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-bin-${{ github.job }}-${{ matrix.arch }}
|
||||
path: |
|
||||
@@ -510,25 +528,23 @@ jobs:
|
||||
compression-level: 0
|
||||
|
||||
meta_files:
|
||||
name: Metadata files
|
||||
if: always() && !cancelled()
|
||||
needs:
|
||||
- process
|
||||
- unix
|
||||
- linux
|
||||
- macos
|
||||
- windows
|
||||
if: always() && !failure() && !cancelled()
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
path: artifact
|
||||
pattern: build-bin-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Make SHA2-SUMS files
|
||||
shell: bash
|
||||
run: |
|
||||
cd ./artifact/
|
||||
# make sure SHA sums are also printed to stdout
|
||||
@@ -584,13 +600,13 @@ jobs:
|
||||
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
|
||||
if: env.GPG_SIGNING_KEY
|
||||
run: |
|
||||
gpg --batch --import <<< "${GPG_SIGNING_KEY}"
|
||||
gpg --batch --import <<< "${{ secrets.GPG_SIGNING_KEY }}"
|
||||
for signfile in ./SHA*SUMS; do
|
||||
gpg --batch --detach-sign "$signfile"
|
||||
done
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
|
||||
23
.github/workflows/cache-warmer.yml
vendored
Normal file
23
.github/workflows/cache-warmer.yml
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
name: Keep cache warm
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 22 1,6,11,16,21,27 * *'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: |
|
||||
vars.KEEP_CACHE_WARM || github.event_name == 'workflow_dispatch'
|
||||
uses: ./.github/workflows/build.yml
|
||||
with:
|
||||
version: '999999'
|
||||
channel: stable
|
||||
origin: ${{ github.repository }}
|
||||
unix: false
|
||||
linux: false
|
||||
linux_armv7l: true
|
||||
musllinux: false
|
||||
macos: true
|
||||
windows: true
|
||||
permissions:
|
||||
contents: read
|
||||
26
.github/workflows/challenge-tests.yml
vendored
26
.github/workflows/challenge-tests.yml
vendored
@@ -16,8 +16,8 @@ on:
|
||||
- yt_dlp/extractor/youtube/jsc/**.py
|
||||
- yt_dlp/extractor/youtube/pot/**.py
|
||||
- yt_dlp/utils/_jsruntime.py
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: challenge-tests-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -26,8 +26,6 @@ concurrency:
|
||||
jobs:
|
||||
tests:
|
||||
name: Challenge Tests
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -37,30 +35,26 @@ jobs:
|
||||
env:
|
||||
QJS_VERSION: '2025-04-26' # Earliest version with rope strings
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install Deno
|
||||
uses: denoland/setup-deno@e95548e56dfa95d4e1a28d6f422fafe75c4c26fb # v2.0.3
|
||||
uses: denoland/setup-deno@v2
|
||||
with:
|
||||
deno-version: '2.0.0' # minimum supported version
|
||||
- name: Install Bun
|
||||
uses: oven-sh/setup-bun@735343b667d3e6f658f44d0eca948eb6282f2b76 # v2.0.2
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
# minimum supported version is 1.0.31 but earliest available Windows version is 1.1.0
|
||||
bun-version: ${{ (matrix.os == 'windows-latest' && '1.1.0') || '1.0.31' }}
|
||||
no-cache: true
|
||||
- name: Install Node
|
||||
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: '20.0' # minimum supported version
|
||||
- name: Install QuickJS (Linux)
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
shell: bash
|
||||
run: |
|
||||
wget "https://bellard.org/quickjs/binary_releases/quickjs-linux-x86_64-${QJS_VERSION}.zip" -O quickjs.zip
|
||||
unzip quickjs.zip qjs
|
||||
@@ -69,19 +63,15 @@ jobs:
|
||||
if: matrix.os == 'windows-latest'
|
||||
shell: pwsh
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
$PSNativeCommandUseErrorActionPreference = $true
|
||||
Invoke-WebRequest "https://bellard.org/quickjs/binary_releases/quickjs-win-x86_64-${Env:QJS_VERSION}.zip" -OutFile quickjs.zip
|
||||
unzip quickjs.zip
|
||||
- name: Install test requirements
|
||||
shell: bash
|
||||
run: |
|
||||
python ./devscripts/install_deps.py --print --omit-default --include-extra test > requirements.txt
|
||||
python ./devscripts/install_deps.py --print --only-optional-groups --include-group test > requirements.txt
|
||||
python ./devscripts/install_deps.py --print -c certifi -c requests -c urllib3 -c yt-dlp-ejs >> requirements.txt
|
||||
python -m pip install -U -r requirements.txt
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
shell: bash
|
||||
run: |
|
||||
python -m yt_dlp -v --js-runtimes node --js-runtimes bun --js-runtimes quickjs || true
|
||||
python ./devscripts/run_tests.py test/test_jsc -k download
|
||||
|
||||
52
.github/workflows/codeql.yml
vendored
52
.github/workflows/codeql.yml
vendored
@@ -2,46 +2,64 @@ name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ 'master' ]
|
||||
branches: [ 'master', 'gh-pages', 'release' ]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ 'master' ]
|
||||
schedule:
|
||||
- cron: '59 11 * * 5'
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: codeql-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze (${{ matrix.language }})
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read # Needed by github/codeql-action if repository is private
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write # Needed to use github/codeql-action with Github Advanced Security
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'actions', 'javascript-typescript', 'python' ]
|
||||
language: [ 'python' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Use only 'java' to analyze code written in Java, Kotlin or both
|
||||
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
|
||||
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
uses: actions/checkout@v5
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
build-mode: none
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
|
||||
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
||||
# queries: security-extended,security-and-quality
|
||||
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
|
||||
# If the Autobuild fails above, remove it and uncomment the following three lines.
|
||||
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
|
||||
|
||||
# - run: |
|
||||
# echo "Run, Build Application using script"
|
||||
# ./location_of_script_within_repo/buildscript.sh
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
|
||||
13
.github/workflows/core.yml
vendored
13
.github/workflows/core.yml
vendored
@@ -22,8 +22,8 @@ on:
|
||||
- yt_dlp/extractor/__init__.py
|
||||
- yt_dlp/extractor/common.py
|
||||
- yt_dlp/extractor/extractors.py
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: core-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -33,8 +33,6 @@ jobs:
|
||||
tests:
|
||||
name: Core Tests
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip')"
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -57,16 +55,15 @@ jobs:
|
||||
- os: windows-latest
|
||||
python-version: pypy-3.11
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python ./devscripts/install_deps.py --include-extra test --include-extra curl-cffi
|
||||
run: python ./devscripts/install_deps.py --include-group test --include-group curl-cffi
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
continue-on-error: False
|
||||
|
||||
48
.github/workflows/download.yml
vendored
Normal file
48
.github/workflows/download.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
name: Download Tests
|
||||
on: [push, pull_request]
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
quick:
|
||||
name: Quick Download Tests
|
||||
if: "contains(github.event.head_commit.message, 'ci run dl')"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install test requirements
|
||||
run: python ./devscripts/install_deps.py --include-group dev
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
run: python ./devscripts/run_tests.py download
|
||||
|
||||
full:
|
||||
name: Full Download Tests
|
||||
if: "contains(github.event.head_commit.message, 'ci run dl all')"
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ['3.11', '3.12', '3.13', '3.14', pypy-3.11]
|
||||
include:
|
||||
# atleast one of each CPython/PyPy tests must be in windows
|
||||
- os: windows-latest
|
||||
python-version: '3.10'
|
||||
- os: windows-latest
|
||||
python-version: pypy-3.11
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python ./devscripts/install_deps.py --include-group dev
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
run: python ./devscripts/run_tests.py download
|
||||
5
.github/workflows/issue-lockdown.yml
vendored
5
.github/workflows/issue-lockdown.yml
vendored
@@ -3,14 +3,13 @@ on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
lockdown:
|
||||
name: Issue Lockdown
|
||||
if: vars.ISSUE_LOCKDOWN
|
||||
permissions:
|
||||
issues: write # Needed to lock issues
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: "Lock new issue"
|
||||
|
||||
31
.github/workflows/quick-test.yml
vendored
31
.github/workflows/quick-test.yml
vendored
@@ -1,51 +1,37 @@
|
||||
name: Quick Test
|
||||
on: [push, pull_request]
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: quick-test-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Core Test
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install test requirements
|
||||
shell: bash
|
||||
run: python ./devscripts/install_deps.py --omit-default --include-extra test
|
||||
run: python ./devscripts/install_deps.py --only-optional-groups --include-group test
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true
|
||||
python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core
|
||||
check:
|
||||
name: Code check
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install dev dependencies
|
||||
run: python ./devscripts/install_deps.py --omit-default --include-extra static-analysis
|
||||
run: python ./devscripts/install_deps.py --only-optional-groups --include-group static-analysis
|
||||
- name: Make lazy extractors
|
||||
run: python ./devscripts/make_lazy_extractors.py
|
||||
- name: Run ruff
|
||||
@@ -53,5 +39,4 @@ jobs:
|
||||
- name: Run autopep8
|
||||
run: autopep8 --diff .
|
||||
- name: Check file mode
|
||||
shell: bash
|
||||
run: git ls-files --format="%(objectmode) %(path)" yt_dlp/ | ( ! grep -v "^100644" )
|
||||
|
||||
24
.github/workflows/release-master.yml
vendored
24
.github/workflows/release-master.yml
vendored
@@ -14,39 +14,35 @@ on:
|
||||
- ".github/workflows/release-master.yml"
|
||||
concurrency:
|
||||
group: release-master
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
release:
|
||||
name: Publish Github release
|
||||
if: vars.BUILD_MASTER
|
||||
permissions:
|
||||
contents: write # May be needed to publish release
|
||||
id-token: write # Needed for trusted publishing
|
||||
uses: ./.github/workflows/release.yml
|
||||
with:
|
||||
prerelease: true
|
||||
source: ${{ (github.repository != 'yt-dlp/yt-dlp' && vars.MASTER_ARCHIVE_REPO) || 'master' }}
|
||||
target: 'master'
|
||||
secrets:
|
||||
ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
|
||||
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write # mandatory for trusted publishing
|
||||
secrets: inherit
|
||||
|
||||
publish_pypi:
|
||||
name: Publish to PyPI
|
||||
needs: [release]
|
||||
if: vars.MASTER_PYPI_PROJECT
|
||||
permissions:
|
||||
id-token: write # Needed for trusted publishing
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write # mandatory for trusted publishing
|
||||
steps:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
path: dist
|
||||
name: build-pypi
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
||||
66
.github/workflows/release-nightly.yml
vendored
66
.github/workflows/release-nightly.yml
vendored
@@ -2,43 +2,21 @@ name: Release (nightly)
|
||||
on:
|
||||
schedule:
|
||||
- cron: '23 23 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check_nightly:
|
||||
name: Check for new commits
|
||||
if: vars.BUILD_NIGHTLY
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
commit: ${{ steps.check_for_new_commits.outputs.commit }}
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Retrieve HEAD commit hash
|
||||
id: head
|
||||
shell: bash
|
||||
run: echo "head=$(git rev-parse HEAD)" | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache nightly commit hash
|
||||
uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
|
||||
with:
|
||||
path: .nightly_commit_hash
|
||||
key: release-nightly-${{ steps.head.outputs.head }}
|
||||
restore-keys: |
|
||||
release-nightly-
|
||||
|
||||
- name: Check for new commits
|
||||
id: check_for_new_commits
|
||||
shell: bash
|
||||
run: |
|
||||
relevant_files=(
|
||||
"yt_dlp/*.py"
|
||||
@@ -52,54 +30,34 @@ jobs:
|
||||
".github/workflows/release.yml"
|
||||
".github/workflows/release-nightly.yml"
|
||||
)
|
||||
if [[ -f .nightly_commit_hash ]]; then
|
||||
limit_args=(
|
||||
"$(cat .nightly_commit_hash)..HEAD"
|
||||
)
|
||||
else
|
||||
limit_args=(
|
||||
--since="24 hours ago"
|
||||
)
|
||||
fi
|
||||
echo "commit=$(git log --format=%H -1 "${limit_args[@]}" -- "${relevant_files[@]}")" | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Record new nightly commit hash
|
||||
env:
|
||||
HEAD: ${{ steps.head.outputs.head }}
|
||||
shell: bash
|
||||
run: echo "${HEAD}" | tee .nightly_commit_hash
|
||||
echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT"
|
||||
|
||||
release:
|
||||
name: Publish Github release
|
||||
needs: [check_nightly]
|
||||
if: ${{ needs.check_nightly.outputs.commit }}
|
||||
permissions:
|
||||
contents: write # May be needed to publish release
|
||||
id-token: write # Needed for trusted publishing
|
||||
uses: ./.github/workflows/release.yml
|
||||
with:
|
||||
prerelease: true
|
||||
source: ${{ (github.repository != 'yt-dlp/yt-dlp' && vars.NIGHTLY_ARCHIVE_REPO) || 'nightly' }}
|
||||
target: 'nightly'
|
||||
secrets:
|
||||
ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
|
||||
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write # mandatory for trusted publishing
|
||||
secrets: inherit
|
||||
|
||||
publish_pypi:
|
||||
name: Publish to PyPI
|
||||
needs: [release]
|
||||
if: vars.NIGHTLY_PYPI_PROJECT
|
||||
permissions:
|
||||
id-token: write # Needed for trusted publishing
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write # mandatory for trusted publishing
|
||||
steps:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
path: dist
|
||||
name: build-pypi
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
||||
61
.github/workflows/release.yml
vendored
61
.github/workflows/release.yml
vendored
@@ -22,11 +22,6 @@ on:
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
secrets:
|
||||
ARCHIVE_REPO_TOKEN:
|
||||
required: false
|
||||
GPG_SIGNING_KEY:
|
||||
required: false
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
source:
|
||||
@@ -61,30 +56,30 @@ on:
|
||||
default: false
|
||||
type: boolean
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
prepare:
|
||||
name: Prepare
|
||||
permissions:
|
||||
contents: write # Needed to git-push the release commit
|
||||
contents: write
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
channel: ${{ steps.setup_variables.outputs.channel }}
|
||||
version: ${{ steps.setup_variables.outputs.version }}
|
||||
target_repo: ${{ steps.setup_variables.outputs.target_repo }}
|
||||
target_repo_token: ${{ steps.setup_variables.outputs.target_repo_token }}
|
||||
target_tag: ${{ steps.setup_variables.outputs.target_tag }}
|
||||
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
|
||||
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
|
||||
head_sha: ${{ steps.get_target.outputs.head_sha }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: true # Needed to git-push the release commit
|
||||
|
||||
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.10" # Keep this in sync with test-workflows.yml
|
||||
|
||||
@@ -109,6 +104,8 @@ jobs:
|
||||
TARGET_PYPI_SUFFIX: ${{ vars[format('{0}_pypi_suffix', steps.process_inputs.outputs.target_repo)] }}
|
||||
SOURCE_ARCHIVE_REPO: ${{ vars[format('{0}_archive_repo', steps.process_inputs.outputs.source_repo)] }}
|
||||
TARGET_ARCHIVE_REPO: ${{ vars[format('{0}_archive_repo', steps.process_inputs.outputs.target_repo)] }}
|
||||
HAS_SOURCE_ARCHIVE_REPO_TOKEN: ${{ !!secrets[format('{0}_archive_repo_token', steps.process_inputs.outputs.source_repo)] }}
|
||||
HAS_TARGET_ARCHIVE_REPO_TOKEN: ${{ !!secrets[format('{0}_archive_repo_token', steps.process_inputs.outputs.target_repo)] }}
|
||||
HAS_ARCHIVE_REPO_TOKEN: ${{ !!secrets.ARCHIVE_REPO_TOKEN }}
|
||||
run: |
|
||||
python -m devscripts.setup_variables
|
||||
@@ -153,41 +150,37 @@ jobs:
|
||||
run: git push origin "${GITHUB_EVENT_REF}"
|
||||
|
||||
build:
|
||||
name: Build
|
||||
needs: [prepare]
|
||||
permissions:
|
||||
contents: read
|
||||
needs: prepare
|
||||
uses: ./.github/workflows/build.yml
|
||||
with:
|
||||
version: ${{ needs.prepare.outputs.version }}
|
||||
channel: ${{ needs.prepare.outputs.channel }}
|
||||
origin: ${{ needs.prepare.outputs.target_repo }}
|
||||
linux_armv7l: ${{ inputs.linux_armv7l }}
|
||||
permissions:
|
||||
contents: read
|
||||
secrets:
|
||||
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
|
||||
|
||||
publish_pypi:
|
||||
name: Publish to PyPI
|
||||
needs: [prepare, build]
|
||||
if: ${{ needs.prepare.outputs.pypi_project }}
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write # Needed for trusted publishing
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write # mandatory for trusted publishing
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0 # Needed for changelog
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
fetch-depth: 0
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
sudo apt -y install pandoc man
|
||||
python devscripts/install_deps.py --omit-default --include-extra build
|
||||
python devscripts/install_deps.py --only-optional-groups --include-group build
|
||||
|
||||
- name: Prepare
|
||||
env:
|
||||
@@ -215,8 +208,8 @@ jobs:
|
||||
python -m build --no-isolation .
|
||||
|
||||
- name: Upload artifacts
|
||||
if: github.event.workflow != '.github/workflows/release.yml' # Reusable workflow_call
|
||||
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-pypi
|
||||
path: |
|
||||
@@ -224,16 +217,15 @@ jobs:
|
||||
compression-level: 0
|
||||
|
||||
- name: Publish to PyPI
|
||||
if: github.event.workflow == '.github/workflows/release.yml' # Direct workflow_dispatch
|
||||
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
||||
publish:
|
||||
name: Publish Github release
|
||||
needs: [prepare, build]
|
||||
permissions:
|
||||
contents: write # Needed by gh to publish release to Github
|
||||
contents: write
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
TARGET_REPO: ${{ needs.prepare.outputs.target_repo }}
|
||||
@@ -241,16 +233,15 @@ jobs:
|
||||
VERSION: ${{ needs.prepare.outputs.version }}
|
||||
HEAD_SHA: ${{ needs.prepare.outputs.head_sha }}
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
|
||||
- uses: actions/download-artifact@v5
|
||||
with:
|
||||
path: artifact
|
||||
pattern: build-*
|
||||
merge-multiple: true
|
||||
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
@@ -291,7 +282,7 @@ jobs:
|
||||
|
||||
- name: Publish to archive repo
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
|
||||
GH_TOKEN: ${{ secrets[needs.prepare.outputs.target_repo_token] }}
|
||||
GH_REPO: ${{ needs.prepare.outputs.target_repo }}
|
||||
TITLE_PREFIX: ${{ startswith(env.TARGET_REPO, 'yt-dlp/') && 'yt-dlp ' || '' }}
|
||||
TITLE: ${{ inputs.target != env.TARGET_REPO && inputs.target || needs.prepare.outputs.channel }}
|
||||
|
||||
7
.github/workflows/sanitize-comment.yml
vendored
7
.github/workflows/sanitize-comment.yml
vendored
@@ -4,15 +4,14 @@ on:
|
||||
issue_comment:
|
||||
types: [created, edited]
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
sanitize-comment:
|
||||
name: Sanitize comment
|
||||
if: vars.SANITIZE_COMMENT && !github.event.issue.pull_request
|
||||
permissions:
|
||||
issues: write # Needed by yt-dlp/sanitize-comment to edit comments
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Sanitize comment
|
||||
uses: yt-dlp/sanitize-comment@4536c691101b89f5373d50fe8a7980cae146346b # v1.0.0
|
||||
uses: yt-dlp/sanitize-comment@v1
|
||||
|
||||
45
.github/workflows/test-workflows.yml
vendored
45
.github/workflows/test-workflows.yml
vendored
@@ -1,54 +1,40 @@
|
||||
name: Test and lint workflows
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
paths:
|
||||
- .github/*.yml
|
||||
- .github/workflows/*
|
||||
- bundle/docker/linux/*.sh
|
||||
- devscripts/setup_variables.py
|
||||
- devscripts/setup_variables_tests.py
|
||||
- devscripts/utils.py
|
||||
pull_request:
|
||||
branches: [master]
|
||||
paths:
|
||||
- .github/*.yml
|
||||
- .github/workflows/*
|
||||
- bundle/docker/linux/*.sh
|
||||
- devscripts/setup_variables.py
|
||||
- devscripts/setup_variables_tests.py
|
||||
- devscripts/utils.py
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: test-workflows-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
ACTIONLINT_VERSION: "1.7.9"
|
||||
ACTIONLINT_SHA256SUM: 233b280d05e100837f4af1433c7b40a5dcb306e3aa68fb4f17f8a7f45a7df7b4
|
||||
ACTIONLINT_VERSION: "1.7.8"
|
||||
ACTIONLINT_SHA256SUM: be92c2652ab7b6d08425428797ceabeb16e31a781c07bc388456b4e592f3e36a
|
||||
ACTIONLINT_REPO: https://github.com/rhysd/actionlint
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Check workflows
|
||||
permissions:
|
||||
contents: read
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.10" # Keep this in sync with release.yml's prepare job
|
||||
- name: Install requirements
|
||||
env:
|
||||
ACTIONLINT_TARBALL: ${{ format('actionlint_{0}_linux_amd64.tar.gz', env.ACTIONLINT_VERSION) }}
|
||||
shell: bash
|
||||
run: |
|
||||
python -m devscripts.install_deps --omit-default --include-extra test
|
||||
python -m devscripts.install_deps --only-optional-groups --include-group test
|
||||
sudo apt -y install shellcheck
|
||||
python -m pip install -U pyflakes
|
||||
curl -LO "${ACTIONLINT_REPO}/releases/download/v${ACTIONLINT_VERSION}/${ACTIONLINT_TARBALL}"
|
||||
@@ -64,20 +50,3 @@ jobs:
|
||||
- name: Test GHA devscripts
|
||||
run: |
|
||||
pytest -Werror --tb=short --color=yes devscripts/setup_variables_tests.py
|
||||
|
||||
zizmor:
|
||||
name: Run zizmor
|
||||
permissions:
|
||||
contents: read
|
||||
actions: read # Needed by zizmorcore/zizmor-action if repository is private
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Run zizmor
|
||||
uses: zizmorcore/zizmor-action@e639db99335bc9038abc0e066dfcd72e23d26fb4 # v0.3.0
|
||||
with:
|
||||
advanced-security: false
|
||||
persona: pedantic
|
||||
version: v1.19.0
|
||||
|
||||
15
.github/zizmor.yml
vendored
15
.github/zizmor.yml
vendored
@@ -1,15 +0,0 @@
|
||||
rules:
|
||||
concurrency-limits:
|
||||
ignore:
|
||||
- build.yml # Can only be triggered by maintainers or cronjob
|
||||
- issue-lockdown.yml # It *should* run for *every* new issue
|
||||
- release-nightly.yml # Can only be triggered by once-daily cronjob
|
||||
- release.yml # Can only be triggered by maintainers or cronjob
|
||||
- sanitize-comment.yml # It *should* run for *every* new comment/edit
|
||||
obfuscation:
|
||||
ignore:
|
||||
- release.yml # Not actual obfuscation
|
||||
unpinned-uses:
|
||||
config:
|
||||
policies:
|
||||
"*": hash-pin
|
||||
@@ -177,7 +177,7 @@ While it is strongly recommended to use `hatch` for yt-dlp development, if you a
|
||||
|
||||
```shell
|
||||
# To only install development dependencies:
|
||||
$ python -m devscripts.install_deps --include-extra dev
|
||||
$ python -m devscripts.install_deps --include-group dev
|
||||
|
||||
# Or, for an editable install plus dev dependencies:
|
||||
$ python -m pip install -e ".[default,dev]"
|
||||
@@ -763,7 +763,7 @@ Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dl
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `traverse_obj` and `try_call` (supersedes `dict_get` and `try_get`) for safe metadata extraction from parsed JSON.
|
||||
Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON.
|
||||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
|
||||
11
CONTRIBUTORS
11
CONTRIBUTORS
@@ -828,18 +828,9 @@ krystophny
|
||||
matyb08
|
||||
pha1n0q
|
||||
PierceLBrooks
|
||||
sepro
|
||||
TheQWERTYCodr
|
||||
thomasmllt
|
||||
w4grfw
|
||||
WeidiDeng
|
||||
Zer0spectrum
|
||||
0xvd
|
||||
1bnBattuta
|
||||
beliote
|
||||
darkstar
|
||||
Haytam001
|
||||
mrFlamel
|
||||
oxyzenQ
|
||||
putridambassador121
|
||||
RezSat
|
||||
WhatAmISupposedToPutHere
|
||||
|
||||
60
Changelog.md
60
Changelog.md
@@ -4,64 +4,6 @@
|
||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.12.08
|
||||
|
||||
#### Core changes
|
||||
- [Respect `PATHEXT` when locating JS runtime on Windows](https://github.com/yt-dlp/yt-dlp/commit/e564b4a8080cff48fa0c28f20272c05085ee6130) ([#15117](https://github.com/yt-dlp/yt-dlp/issues/15117)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **cookies**: [Fix `--cookies-from-browser` for new installs of Firefox 147+](https://github.com/yt-dlp/yt-dlp/commit/fa16dc5241ac1552074feee48e1c2605dc36d352) ([#15215](https://github.com/yt-dlp/yt-dlp/issues/15215)) by [bashonly](https://github.com/bashonly), [mbway](https://github.com/mbway)
|
||||
|
||||
#### Extractor changes
|
||||
- **agalega**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3cb5e4db54d44fe82d4eee94ae2f37cbce2e7dfc) ([#15105](https://github.com/yt-dlp/yt-dlp/issues/15105)) by [putridambassador121](https://github.com/putridambassador121)
|
||||
- **alibaba**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c70b57c03e0c25767a5166620798297a2a4878fb) ([#15253](https://github.com/yt-dlp/yt-dlp/issues/15253)) by [seproDev](https://github.com/seproDev)
|
||||
- **bitmovin**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/45a3b42bb917e99b0b5c155c272ebf4a82a5bf66) ([#15064](https://github.com/yt-dlp/yt-dlp/issues/15064)) by [seproDev](https://github.com/seproDev)
|
||||
- **digiteka**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/6842620d56e4c4e6affb90c2f8dff8a36dee852c) ([#14903](https://github.com/yt-dlp/yt-dlp/issues/14903)) by [beliote](https://github.com/beliote)
|
||||
- **fc2**: live: [Raise appropriate error when stream is offline](https://github.com/yt-dlp/yt-dlp/commit/4433b3a217c9f430dc057643bfd7b6769eff4a45) ([#15180](https://github.com/yt-dlp/yt-dlp/issues/15180)) by [Zer0spectrum](https://github.com/Zer0spectrum)
|
||||
- **floatplane**: [Add subtitle support](https://github.com/yt-dlp/yt-dlp/commit/b333ef1b3f961e292a8bf7052c54b54c81587a17) ([#15069](https://github.com/yt-dlp/yt-dlp/issues/15069)) by [seproDev](https://github.com/seproDev)
|
||||
- **jtbc**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/947e7883406e5ea43687d6e4ff721cc0162c9664) ([#15047](https://github.com/yt-dlp/yt-dlp/issues/15047)) by [seproDev](https://github.com/seproDev)
|
||||
- **loom**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/36b29bb3532e008a2aaf3d36d1c6fc3944137930) ([#15236](https://github.com/yt-dlp/yt-dlp/issues/15236)) by [bashonly](https://github.com/bashonly)
|
||||
- **mave**: channel: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5f66ac71f6637f768cd251509b0a932d0ce56427) ([#14915](https://github.com/yt-dlp/yt-dlp/issues/14915)) by [anlar](https://github.com/anlar)
|
||||
- **medaltv**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/a4c72acc462668a938827370bd77084a1cd4733b) ([#15103](https://github.com/yt-dlp/yt-dlp/issues/15103)) by [seproDev](https://github.com/seproDev)
|
||||
- **netapp**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/20f83f208eae863250b35e2761adad88e91d85a1) ([#15122](https://github.com/yt-dlp/yt-dlp/issues/15122)) by [darkstar](https://github.com/darkstar)
|
||||
- **nhk**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/12d411722a3d7a0382d1d230a904ecd4e20298b6) ([#14528](https://github.com/yt-dlp/yt-dlp/issues/14528)) by [garret1317](https://github.com/garret1317)
|
||||
- **nowcanal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/4e680db1505dafb93313b1d42ffcd3f230fcc92a) ([#14584](https://github.com/yt-dlp/yt-dlp/issues/14584)) by [pferreir](https://github.com/pferreir)
|
||||
- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/023e4db9afe0630c608621846856a1ca876d8bab) ([#15108](https://github.com/yt-dlp/yt-dlp/issues/15108)) by [thomasmllt](https://github.com/thomasmllt)
|
||||
- **rinsefm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/d6aa8c235d2e7d9374f79ec73af23a3859c76bea) ([#15020](https://github.com/yt-dlp/yt-dlp/issues/15020)) by [1bnBattuta](https://github.com/1bnBattuta), [seproDev](https://github.com/seproDev)
|
||||
- **s4c**: [Fix geo-restricted content](https://github.com/yt-dlp/yt-dlp/commit/26c2545b87e2b22f134d1f567ed4d4b0b91c3253) ([#15196](https://github.com/yt-dlp/yt-dlp/issues/15196)) by [seproDev](https://github.com/seproDev)
|
||||
- **soundcloudplaylist**: [Support new API URLs](https://github.com/yt-dlp/yt-dlp/commit/1dd84b9d1c33e50de49866b0d93c2596897ce506) ([#15071](https://github.com/yt-dlp/yt-dlp/issues/15071)) by [seproDev](https://github.com/seproDev)
|
||||
- **sporteurope**: [Support new domain](https://github.com/yt-dlp/yt-dlp/commit/025191fea655ac879ca6dc68df358c26456a6e46) ([#15251](https://github.com/yt-dlp/yt-dlp/issues/15251)) by [bashonly](https://github.com/bashonly)
|
||||
- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2c9f0c3456057aff0631d9ea6d3eda70ffd8aabe) ([#15113](https://github.com/yt-dlp/yt-dlp/issues/15113)) by [bashonly](https://github.com/bashonly)
|
||||
- **thechosen**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/854fded114f3b7b33693c2d3418575d04014aa4b) ([#14183](https://github.com/yt-dlp/yt-dlp/issues/14183)) by [mrFlamel](https://github.com/mrFlamel)
|
||||
- **thisoldhouse**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/9daba4f442139ee2537746398afc5ac30b51c28c) ([#15097](https://github.com/yt-dlp/yt-dlp/issues/15097)) by [bashonly](https://github.com/bashonly)
|
||||
- **tubitv**: series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2a777ecbd598de19a4c691ba1f790ccbec9cdbc4) ([#15018](https://github.com/yt-dlp/yt-dlp/issues/15018)) by [Zer0spectrum](https://github.com/Zer0spectrum)
|
||||
- **urplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c2e7e9cdb2261adde01048d161914b156a3bad51) ([#15120](https://github.com/yt-dlp/yt-dlp/issues/15120)) by [seproDev](https://github.com/seproDev)
|
||||
- **web.archive**: youtube: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7ec6b9bc40ee8a21b11cce83a09a07a37014062e) ([#15234](https://github.com/yt-dlp/yt-dlp/issues/15234)) by [seproDev](https://github.com/seproDev)
|
||||
- **wistiachannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0c696239ef418776ac6ba20284bd2f3976a011b4) ([#14218](https://github.com/yt-dlp/yt-dlp/issues/14218)) by [Sojiroh](https://github.com/Sojiroh)
|
||||
- **xhamster**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/29e257037862f3b2ad65e6e8d2972f9ed89389e3) ([#15252](https://github.com/yt-dlp/yt-dlp/issues/15252)) by [0xvd](https://github.com/0xvd)
|
||||
- **yfanefa**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/af285016d2b14c4445109283e7c590b31542de88) ([#15032](https://github.com/yt-dlp/yt-dlp/issues/15032)) by [Haytam001](https://github.com/Haytam001)
|
||||
- **youtube**
|
||||
- [Add `use_ad_playback_context` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/f7acf3c1f42cc474927ecc452205d7877af36731) ([#15220](https://github.com/yt-dlp/yt-dlp/issues/15220)) by [WhatAmISupposedToPutHere](https://github.com/WhatAmISupposedToPutHere)
|
||||
- [Allow `ejs` patch version to differ](https://github.com/yt-dlp/yt-dlp/commit/7bd79d92965fe9f84d7e1720eb6bb10fa9a10c77) ([#15263](https://github.com/yt-dlp/yt-dlp/issues/15263)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Detect "super resolution" AI-upscaled formats](https://github.com/yt-dlp/yt-dlp/commit/4cb5e191efeebc3679f89c3c8ac819bcd511bb1f) ([#15050](https://github.com/yt-dlp/yt-dlp/issues/15050)) by [bashonly](https://github.com/bashonly)
|
||||
- [Determine wait time from player response](https://github.com/yt-dlp/yt-dlp/commit/715af0c636b2b33fb3df1eb2ee37eac8262d43ac) ([#14646](https://github.com/yt-dlp/yt-dlp/issues/14646)) by [bashonly](https://github.com/bashonly), [WhatAmISupposedToPutHere](https://github.com/WhatAmISupposedToPutHere)
|
||||
- [Extract all automatic caption languages](https://github.com/yt-dlp/yt-dlp/commit/419776ecf57269efb13095386a19ddc75c1f11b2) ([#15156](https://github.com/yt-dlp/yt-dlp/issues/15156)) by [bashonly](https://github.com/bashonly)
|
||||
- [Improve message when no JS runtime is found](https://github.com/yt-dlp/yt-dlp/commit/1d43fa5af883f96af902a29544fc766f5e97fce6) ([#15266](https://github.com/yt-dlp/yt-dlp/issues/15266)) by [bashonly](https://github.com/bashonly)
|
||||
- [Update ejs to 0.3.2](https://github.com/yt-dlp/yt-dlp/commit/0c7e4cfcaed95909d7c1c0a11b5a12881bcfdfd6) ([#15267](https://github.com/yt-dlp/yt-dlp/issues/15267)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Downloader changes
|
||||
- [Fix playback wait time for ffmpeg downloads](https://github.com/yt-dlp/yt-dlp/commit/23f1ab346927ab73ad510fd7ba105a69e5291c66) ([#15066](https://github.com/yt-dlp/yt-dlp/issues/15066)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Postprocessor changes
|
||||
- **ffmpeg**: [Fix uncaught error if bad --ffmpeg-location is given](https://github.com/yt-dlp/yt-dlp/commit/0eed3fe530d6ff4b668494c5b1d4d6fc1ade96f7) ([#15104](https://github.com/yt-dlp/yt-dlp/issues/15104)) by [bashonly](https://github.com/bashonly)
|
||||
- **ffmpegmetadata**: [Add more tag mappings](https://github.com/yt-dlp/yt-dlp/commit/04050be583aae21f99932a674d1d2992ff016d5c) ([#14654](https://github.com/yt-dlp/yt-dlp/issues/14654)) by [garret1317](https://github.com/garret1317)
|
||||
|
||||
#### Networking changes
|
||||
- **Request Handler**: urllib: [Do not read after close](https://github.com/yt-dlp/yt-dlp/commit/6ee6a6fc58d6254ef944bd311e6890e208a75e98) ([#15049](https://github.com/yt-dlp/yt-dlp/issues/15049)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Misc. changes
|
||||
- **build**: [Bump PyInstaller minimum version requirement to 6.17.0](https://github.com/yt-dlp/yt-dlp/commit/280165026886a1f1614ab527c34c66d71faa5d69) ([#15199](https://github.com/yt-dlp/yt-dlp/issues/15199)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**: Miscellaneous: [7a52ff2](https://github.com/yt-dlp/yt-dlp/commit/7a52ff29d86efc8f3adeba977b2009ce40b8e52e) by [bashonly](https://github.com/bashonly), [oxyzenQ](https://github.com/oxyzenQ), [RezSat](https://github.com/RezSat), [seproDev](https://github.com/seproDev)
|
||||
- **devscripts**: `install_deps`: [Align options/terms with PEP 735](https://github.com/yt-dlp/yt-dlp/commit/29fe515d8d3386b3406ff02bdabb967d6821bc02) ([#15200](https://github.com/yt-dlp/yt-dlp/issues/15200)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.11.12
|
||||
|
||||
#### Important changes
|
||||
@@ -122,7 +64,7 @@ yt-dlp now requires users to have an external JavaScript runtime (e.g. Deno) ins
|
||||
- **build**: [Bump musllinux Python version to 3.14](https://github.com/yt-dlp/yt-dlp/commit/646904cd3a79429ec5fdc43f904b3f57ae213f34) ([#14623](https://github.com/yt-dlp/yt-dlp/issues/14623)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**
|
||||
- Miscellaneous
|
||||
- [c63b4e2](https://github.com/yt-dlp/yt-dlp/commit/c63b4e2a2b81cc78397c8709ef53ffd29bada213) by [bashonly](https://github.com/bashonly), [matyb08](https://github.com/matyb08), [seproDev](https://github.com/seproDev)
|
||||
- [c63b4e2](https://github.com/yt-dlp/yt-dlp/commit/c63b4e2a2b81cc78397c8709ef53ffd29bada213) by [bashonly](https://github.com/bashonly), [matyb08](https://github.com/matyb08), [sepro](https://github.com/sepro)
|
||||
- [335653b](https://github.com/yt-dlp/yt-dlp/commit/335653be82d5ef999cfc2879d005397402eebec1) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- **devscripts**: [Improve `install_deps` script](https://github.com/yt-dlp/yt-dlp/commit/73922e66e437fb4bb618bdc119a96375081bf508) ([#14766](https://github.com/yt-dlp/yt-dlp/issues/14766)) by [bashonly](https://github.com/bashonly)
|
||||
- **test**: [Skip flaky tests if source unchanged](https://github.com/yt-dlp/yt-dlp/commit/ade8c2b36ff300edef87d48fd1ba835ac35c5b63) ([#14970](https://github.com/yt-dlp/yt-dlp/issues/14970)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
@@ -8,7 +8,9 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
|
||||
|
||||
Core Maintainers are responsible for reviewing and merging contributions, publishing releases, and steering the overall direction of the project.
|
||||
|
||||
**You can contact the core maintainers via `maintainers@yt-dlp.org`.** This email address is **NOT** a support channel. [Open an issue](https://github.com/yt-dlp/yt-dlp/issues/new/choose) if you need help or want to report a bug.
|
||||
**You can contact the core maintainers via `maintainers@yt-dlp.org`.**
|
||||
|
||||
This is **NOT** a support channel. [Open an issue](https://github.com/yt-dlp/yt-dlp/issues/new/choose) if you need help or want to report a bug.
|
||||
|
||||
### [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
@@ -16,7 +18,6 @@ Core Maintainers are responsible for reviewing and merging contributions, publis
|
||||
|
||||
* Overhauled the networking stack and implemented support for `requests` and `curl_cffi` (`--impersonate`) HTTP clients
|
||||
* Reworked the plugin architecture to support installing plugins across all yt-dlp distributions (exe, pip, etc.)
|
||||
* Implemented support for external JavaScript runtimes/engines
|
||||
* Maintains support for YouTube
|
||||
* Added and fixed support for various other sites
|
||||
|
||||
@@ -24,10 +25,9 @@ Core Maintainers are responsible for reviewing and merging contributions, publis
|
||||
|
||||
* Rewrote and maintains the build/release workflows and the self-updater: executables, automated/nightly/master releases, `--update-to`
|
||||
* Overhauled external downloader cookie handling
|
||||
* Helped in implementing support for external JavaScript runtimes/engines
|
||||
* Added `--cookies-from-browser` support for Firefox containers
|
||||
* Maintains support for sites like YouTube, Vimeo, Twitter, TikTok, etc
|
||||
* Added support for various sites
|
||||
* Overhauled and maintains support for sites like Youtube, Vimeo, Twitter, TikTok, etc
|
||||
* Added support for sites like Dacast, Kick, Loom, SproutVideo, Triller, Weverse, etc
|
||||
|
||||
|
||||
### [Grub4K](https://github.com/Grub4K)
|
||||
@@ -37,14 +37,12 @@ Core Maintainers are responsible for reviewing and merging contributions, publis
|
||||
* `--update-to`, self-updater rewrite, automated/nightly/master releases
|
||||
* Reworked internals like `traverse_obj`, various core refactors and bugs fixes
|
||||
* Implemented proper progress reporting for parallel downloads
|
||||
* Implemented support for external JavaScript runtimes/engines
|
||||
* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc
|
||||
|
||||
|
||||
### [sepro](https://github.com/seproDev)
|
||||
|
||||
* UX improvements: Warn when ffmpeg is missing, warn when double-clicking exe
|
||||
* Helped in implementing support for external JavaScript runtimes/engines
|
||||
* Code cleanup: Remove dead extractors, mark extractors as broken, enable/apply ruff rules
|
||||
* Improved/fixed/added ArdMediathek, DRTV, Floatplane, MagentaMusik, Naver, Nebula, OnDemandKorea, Vbox7 etc
|
||||
|
||||
|
||||
6
Makefile
6
Makefile
@@ -202,9 +202,9 @@ CONTRIBUTORS: Changelog.md
|
||||
|
||||
# The following EJS_-prefixed variables are auto-generated by devscripts/update_ejs.py
|
||||
# DO NOT EDIT!
|
||||
EJS_VERSION = 0.3.2
|
||||
EJS_WHEEL_NAME = yt_dlp_ejs-0.3.2-py3-none-any.whl
|
||||
EJS_WHEEL_HASH = sha256:f2dc6b3d1b909af1f13e021621b0af048056fca5fb07c4db6aa9bbb37a4f66a9
|
||||
EJS_VERSION = 0.3.1
|
||||
EJS_WHEEL_NAME = yt_dlp_ejs-0.3.1-py3-none-any.whl
|
||||
EJS_WHEEL_HASH = sha256:a6e3548874db7c774388931752bb46c7f4642c044b2a189e56968f3d5ecab622
|
||||
EJS_PY_FOLDERS = yt_dlp_ejs yt_dlp_ejs/yt yt_dlp_ejs/yt/solver
|
||||
EJS_PY_FILES = yt_dlp_ejs/__init__.py yt_dlp_ejs/_version.py yt_dlp_ejs/yt/__init__.py yt_dlp_ejs/yt/solver/__init__.py
|
||||
EJS_JS_FOLDERS = yt_dlp_ejs/yt/solver
|
||||
|
||||
33
README.md
33
README.md
@@ -203,7 +203,7 @@ Python versions 3.10+ (CPython) and 3.11+ (PyPy) are supported. Other versions a
|
||||
On Windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually.
|
||||
-->
|
||||
|
||||
While all the other dependencies are optional, `ffmpeg`, `ffprobe`, `yt-dlp-ejs` and a supported JavaScript runtime/engine are highly recommended
|
||||
While all the other dependencies are optional, `ffmpeg`, `ffprobe`, `yt-dlp-ejs` and a JavaScript runtime are highly recommended
|
||||
|
||||
### Strongly recommended
|
||||
|
||||
@@ -215,7 +215,7 @@ While all the other dependencies are optional, `ffmpeg`, `ffprobe`, `yt-dlp-ejs`
|
||||
|
||||
* [**yt-dlp-ejs**](https://github.com/yt-dlp/ejs) - Required for deciphering YouTube n/sig values. Licensed under [Unlicense](https://github.com/yt-dlp/ejs/blob/main/LICENSE), bundles [MIT](https://github.com/davidbonnet/astring/blob/main/LICENSE) and [ISC](https://github.com/meriyah/meriyah/blob/main/LICENSE.md) components.
|
||||
|
||||
A JavaScript runtime/engine like [**deno**](https://deno.land) (recommended), [**node.js**](https://nodejs.org), [**bun**](https://bun.sh), or [**QuickJS**](https://bellard.org/quickjs/) is also required to run yt-dlp-ejs. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/EJS).
|
||||
A JavaScript runtime like [**deno**](https://deno.land) (recommended), [**node.js**](https://nodejs.org), [**bun**](https://bun.sh), or [**QuickJS**](https://bellard.org/quickjs/) is also required to run yt-dlp-ejs. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/EJS).
|
||||
|
||||
### Networking
|
||||
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
|
||||
@@ -228,7 +228,7 @@ While all the other dependencies are optional, `ffmpeg`, `ffprobe`, `yt-dlp-ejs`
|
||||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||
|
||||
* [**curl_cffi**](https://github.com/lexiforest/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lexiforest/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/lexiforest/curl_cffi/blob/main/LICENSE)
|
||||
* Can be installed with the `curl-cffi` extra, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
||||
* Currently included in most builds *except* `yt-dlp` (Unix zipimport binary), `yt-dlp_x86` (Windows 32-bit) and `yt-dlp_musllinux_aarch64`
|
||||
|
||||
|
||||
@@ -265,7 +265,7 @@ To build the standalone executable, you must have Python and `pyinstaller` (plus
|
||||
You can run the following commands:
|
||||
|
||||
```
|
||||
python devscripts/install_deps.py --include-extra pyinstaller
|
||||
python devscripts/install_deps.py --include-group pyinstaller
|
||||
python devscripts/make_lazy_extractors.py
|
||||
python -m bundle.pyinstaller
|
||||
```
|
||||
@@ -483,7 +483,7 @@ Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords
|
||||
two-letter ISO 3166-2 country code
|
||||
|
||||
## Video Selection:
|
||||
-I, --playlist-items ITEM_SPEC Comma-separated playlist_index of the items
|
||||
-I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items
|
||||
to download. You can specify a range using
|
||||
"[START]:[STOP][:STEP]". For backward
|
||||
compatibility, START-STOP is also supported.
|
||||
@@ -1299,7 +1299,7 @@ The field names themselves (the part inside the parenthesis) can also have some
|
||||
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
|
||||
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma-separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
|
||||
1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
|
||||
|
||||
@@ -1351,7 +1351,6 @@ The available fields are:
|
||||
- `repost_count` (numeric): Number of reposts of the video
|
||||
- `average_rating` (numeric): Average rating given by users, the scale used depends on the webpage
|
||||
- `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
|
||||
- `save_count` (numeric): Number of times the video has been saved or bookmarked
|
||||
- `age_limit` (numeric): Age restriction for the video (years)
|
||||
- `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed)
|
||||
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
||||
@@ -1799,8 +1798,8 @@ Metadata fields | From
|
||||
`track` | `track_number`
|
||||
`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
|
||||
`composer` | `composer` or `composers`
|
||||
`genre` | `genre`, `genres`, `categories` or `tags`
|
||||
`album` | `album` or `series`
|
||||
`genre` | `genre` or `genres`
|
||||
`album` | `album`
|
||||
`album_artist` | `album_artist` or `album_artists`
|
||||
`disc` | `disc_number`
|
||||
`show` | `series`
|
||||
@@ -1821,9 +1820,6 @@ $ yt-dlp --parse-metadata "title:%(artist)s - %(title)s"
|
||||
# Regex example
|
||||
$ yt-dlp --parse-metadata "description:Artist - (?P<artist>.+)"
|
||||
|
||||
# Copy the episode field to the title field (with FROM and TO as single fields)
|
||||
$ yt-dlp --parse-metadata "episode:title"
|
||||
|
||||
# Set title as "Series name S01E05"
|
||||
$ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s"
|
||||
|
||||
@@ -1856,30 +1852,29 @@ The following extractors use this feature:
|
||||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply`, `tv_downgraded`, and `tv_embedded`. By default, `tv,android_sdkless,web` is used. If no JavaScript runtime/engine is available, then `android_sdkless,web_safari,web` is used. If logged-in cookies are passed to yt-dlp, then `tv_downgraded,web_safari,web` is used for free accounts and `tv_downgraded,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_sdkless`, `android_vr`, `tv`, `tv_simply`, `tv_downgraded`, and `tv_embedded`. By default, `tv,android_sdkless,web` is used. If no JavaScript runtime is available, then `android_sdkless,web_safari,web` is used. If logged-in cookies are passed to yt-dlp, then `tv_downgraded,web_safari,web` is used for free accounts and `tv_downgraded,web_creator,web` is used for premium accounts. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||
* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `player_js_variant`: The player javascript variant to use for n/sig deciphering. The known variants are: `main`, `tcc`, `tce`, `es5`, `es6`, `tv`, `tv_es6`, `phone`, `tablet`. The default is `main`, and the others are for debugging purposes. You can use `actual` to go with what is prescribed by the site
|
||||
* `player_js_version`: The player javascript version to use for n/sig deciphering, in the format of `signature_timestamp@hash` (e.g. `20348@0004de42`). The default is to use what is prescribed by the site, and can be selected with `actual`
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread,max-depth`. Default is `all,all,all,all,all`
|
||||
* A `max-depth` value of `1` will discard all replies, regardless of the `max-replies` or `max-replies-per-thread` values given
|
||||
* E.g. `all,all,1000,10,2` will get a maximum of 1000 replies total, with up to 10 replies per thread, and only 2 levels of depth (i.e. top-level comments plus their immediate replies). `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage`
|
||||
* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID)
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma-separated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
|
||||
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
|
||||
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
|
||||
* `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds
|
||||
* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)
|
||||
* `use_ad_playback_context`: Skip preroll ads to eliminate the mandatory wait period before download. Do NOT use this when passing premium account cookies to yt-dlp, as it will result in a loss of premium formats. Only effective with the `web`, `web_safari`, `web_music` and `mweb` player clients. Either `true` or `false` (default)
|
||||
|
||||
#### youtube-ejs
|
||||
* `jitless`: Run supported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered insecure. Either `true` or `false` (default)
|
||||
* `jitless`: Run suported Javascript engines in JIT-less mode. Supported runtimes are `deno`, `node` and `bun`. Provides better security at the cost of performance/speed. Do note that `node` and `bun` are still considered unsecure. Either `true` or `false` (default)
|
||||
|
||||
#### youtubepot-webpo
|
||||
* `bind_to_visitor_id`: Whether to use the Visitor ID instead of Visitor Data for caching WebPO tokens. Either `true` (default) or `false`
|
||||
|
||||
@@ -26,7 +26,7 @@ services:
|
||||
platforms:
|
||||
- "linux/amd64"
|
||||
args:
|
||||
VERIFYIMAGE: quay.io/pypa/manylinux2014_x86_64:2025.12.19-1@sha256:b716645f9aecd0c1418283af930804bbdbd68a73d855a60101c5aab8548d737d
|
||||
VERIFYIMAGE: quay.io/pypa/manylinux2014_x86_64:latest
|
||||
environment:
|
||||
EXE_NAME: ${EXE_NAME:?}
|
||||
UPDATE_TO:
|
||||
@@ -61,7 +61,7 @@ services:
|
||||
platforms:
|
||||
- "linux/arm64"
|
||||
args:
|
||||
VERIFYIMAGE: quay.io/pypa/manylinux2014_aarch64:2025.12.19-1@sha256:36cbe6638c7c605c2b44a92e35751baa537ec8902112f790139d89c7e1ccd2a4
|
||||
VERIFYIMAGE: quay.io/pypa/manylinux2014_aarch64:latest
|
||||
environment:
|
||||
EXE_NAME: ${EXE_NAME:?}
|
||||
UPDATE_TO:
|
||||
@@ -97,7 +97,7 @@ services:
|
||||
platforms:
|
||||
- "linux/arm/v7"
|
||||
args:
|
||||
VERIFYIMAGE: arm32v7/debian:bullseye@sha256:9d544bf6ff73e36b8df1b7e415f6c8ee40ed84a0f3a26970cac8ea88b0ccf2ac
|
||||
VERIFYIMAGE: arm32v7/debian:bullseye
|
||||
environment:
|
||||
EXE_NAME: ${EXE_NAME:?}
|
||||
UPDATE_TO:
|
||||
@@ -132,7 +132,7 @@ services:
|
||||
platforms:
|
||||
- "linux/amd64"
|
||||
args:
|
||||
VERIFYIMAGE: alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62
|
||||
VERIFYIMAGE: alpine:3.22
|
||||
environment:
|
||||
EXE_NAME: ${EXE_NAME:?}
|
||||
UPDATE_TO:
|
||||
@@ -168,7 +168,7 @@ services:
|
||||
platforms:
|
||||
- "linux/arm64"
|
||||
args:
|
||||
VERIFYIMAGE: alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62
|
||||
VERIFYIMAGE: alpine:3.22
|
||||
environment:
|
||||
EXE_NAME: ${EXE_NAME:?}
|
||||
UPDATE_TO:
|
||||
|
||||
@@ -15,12 +15,12 @@ function venvpy {
|
||||
}
|
||||
|
||||
INCLUDES=(
|
||||
--include-extra pyinstaller
|
||||
--include-extra secretstorage
|
||||
--include-group pyinstaller
|
||||
--include-group secretstorage
|
||||
)
|
||||
|
||||
if [[ -z "${EXCLUDE_CURL_CFFI:-}" ]]; then
|
||||
INCLUDES+=(--include-extra curl-cffi)
|
||||
INCLUDES+=(--include-group curl-cffi)
|
||||
fi
|
||||
|
||||
runpy -m venv /yt-dlp-build-venv
|
||||
@@ -28,7 +28,7 @@ runpy -m venv /yt-dlp-build-venv
|
||||
source /yt-dlp-build-venv/bin/activate
|
||||
# Inside the venv we use venvpy instead of runpy
|
||||
venvpy -m ensurepip --upgrade --default-pip
|
||||
venvpy -m devscripts.install_deps --omit-default --include-extra build
|
||||
venvpy -m devscripts.install_deps --only-optional-groups --include-group build
|
||||
venvpy -m devscripts.install_deps "${INCLUDES[@]}"
|
||||
venvpy -m devscripts.make_lazy_extractors
|
||||
venvpy devscripts/update-version.py -c "${CHANNEL}" -r "${ORIGIN}" "${VERSION}"
|
||||
|
||||
@@ -319,11 +319,5 @@
|
||||
"action": "add",
|
||||
"when": "6224a3898821965a7d6a2cb9cc2de40a0fd6e6bc",
|
||||
"short": "[priority] **An external JavaScript runtime is now required for full YouTube support**\nyt-dlp now requires users to have an external JavaScript runtime (e.g. Deno) installed in order to solve the JavaScript challenges presented by YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/15012)"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "c63b4e2a2b81cc78397c8709ef53ffd29bada213",
|
||||
"short": "[cleanup] Misc (#14767)",
|
||||
"authors": ["bashonly", "seproDev", "matyb08"]
|
||||
}
|
||||
]
|
||||
|
||||
@@ -25,16 +25,16 @@ def parse_args():
|
||||
'-e', '--exclude-dependency', metavar='DEPENDENCY', action='append',
|
||||
help='exclude a dependency (can be used multiple times)')
|
||||
parser.add_argument(
|
||||
'-i', '--include-extra', metavar='EXTRA', action='append',
|
||||
help='include an extra/optional-dependencies list (can be used multiple times)')
|
||||
'-i', '--include-group', metavar='GROUP', action='append',
|
||||
help='include an optional dependency group (can be used multiple times)')
|
||||
parser.add_argument(
|
||||
'-c', '--cherry-pick', metavar='DEPENDENCY', action='append',
|
||||
help=(
|
||||
'only include a specific dependency from the resulting dependency list '
|
||||
'(can be used multiple times)'))
|
||||
parser.add_argument(
|
||||
'-o', '--omit-default', action='store_true',
|
||||
help='omit the "default" extra unless it is explicitly included (it is included by default)')
|
||||
'-o', '--only-optional-groups', action='store_true',
|
||||
help='omit default dependencies unless the "default" group is specified with --include-group')
|
||||
parser.add_argument(
|
||||
'-p', '--print', action='store_true',
|
||||
help='only print requirements to stdout')
|
||||
@@ -51,27 +51,27 @@ def uniq(arg) -> dict[str, None]:
|
||||
def main():
|
||||
args = parse_args()
|
||||
project_table = parse_toml(read_file(args.input))['project']
|
||||
recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<extra_name>[\w-]+)\]')
|
||||
extras = project_table['optional-dependencies']
|
||||
recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<group_name>[\w-]+)\]')
|
||||
optional_groups = project_table['optional-dependencies']
|
||||
|
||||
excludes = uniq(args.exclude_dependency)
|
||||
only_includes = uniq(args.cherry_pick)
|
||||
include_extras = uniq(args.include_extra)
|
||||
include_groups = uniq(args.include_group)
|
||||
|
||||
def yield_deps(extra):
|
||||
for dep in extra:
|
||||
def yield_deps(group):
|
||||
for dep in group:
|
||||
if mobj := recursive_pattern.fullmatch(dep):
|
||||
yield from extras.get(mobj.group('extra_name'), ())
|
||||
yield from optional_groups.get(mobj.group('group_name'), ())
|
||||
else:
|
||||
yield dep
|
||||
|
||||
targets = {}
|
||||
if not args.omit_default:
|
||||
if not args.only_optional_groups:
|
||||
# legacy: 'dependencies' is empty now
|
||||
targets.update(dict.fromkeys(project_table['dependencies']))
|
||||
targets.update(dict.fromkeys(yield_deps(extras['default'])))
|
||||
targets.update(dict.fromkeys(yield_deps(optional_groups['default'])))
|
||||
|
||||
for include in filter(None, map(extras.get, include_extras)):
|
||||
for include in filter(None, map(optional_groups.get, include_groups)):
|
||||
targets.update(dict.fromkeys(yield_deps(include)))
|
||||
|
||||
def target_filter(target):
|
||||
|
||||
@@ -251,13 +251,7 @@ class CommitRange:
|
||||
''', re.VERBOSE | re.DOTALL)
|
||||
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
|
||||
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
|
||||
FIXES_RE = re.compile(r'''
|
||||
(?i:
|
||||
(?:bug\s*)?fix(?:es)?(?:
|
||||
\s+(?:bugs?|regression(?:\s+introduced)?)
|
||||
)?(?:\s+(?:in|for|from|by))?
|
||||
|Improve
|
||||
)\s+([\da-f]{40})''', re.VERBOSE)
|
||||
FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
|
||||
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
|
||||
|
||||
def __init__(self, start, end, default_author=None):
|
||||
|
||||
@@ -21,6 +21,8 @@ def setup_variables(environment):
|
||||
SOURCE_PYPI_PROJECT, SOURCE_PYPI_SUFFIX,
|
||||
TARGET_PYPI_PROJECT, TARGET_PYPI_SUFFIX,
|
||||
SOURCE_ARCHIVE_REPO, TARGET_ARCHIVE_REPO,
|
||||
HAS_SOURCE_ARCHIVE_REPO_TOKEN,
|
||||
HAS_TARGET_ARCHIVE_REPO_TOKEN,
|
||||
HAS_ARCHIVE_REPO_TOKEN
|
||||
|
||||
`INPUTS` must contain these keys:
|
||||
@@ -35,6 +37,8 @@ def setup_variables(environment):
|
||||
PROCESSED = json.loads(environment['PROCESSED'])
|
||||
|
||||
source_channel = None
|
||||
does_not_have_needed_token = False
|
||||
target_repo_token = None
|
||||
pypi_project = None
|
||||
pypi_suffix = None
|
||||
|
||||
@@ -77,19 +81,28 @@ def setup_variables(environment):
|
||||
target_repo = REPOSITORY
|
||||
if target_repo != REPOSITORY:
|
||||
target_repo = environment['TARGET_ARCHIVE_REPO']
|
||||
target_repo_token = f'{PROCESSED["target_repo"].upper()}_ARCHIVE_REPO_TOKEN'
|
||||
if not json.loads(environment['HAS_TARGET_ARCHIVE_REPO_TOKEN']):
|
||||
does_not_have_needed_token = True
|
||||
pypi_project = environment['TARGET_PYPI_PROJECT'] or None
|
||||
pypi_suffix = environment['TARGET_PYPI_SUFFIX'] or None
|
||||
else:
|
||||
target_tag = source_tag or version
|
||||
if source_channel:
|
||||
target_repo = source_channel
|
||||
target_repo_token = f'{PROCESSED["source_repo"].upper()}_ARCHIVE_REPO_TOKEN'
|
||||
if not json.loads(environment['HAS_SOURCE_ARCHIVE_REPO_TOKEN']):
|
||||
does_not_have_needed_token = True
|
||||
pypi_project = environment['SOURCE_PYPI_PROJECT'] or None
|
||||
pypi_suffix = environment['SOURCE_PYPI_SUFFIX'] or None
|
||||
else:
|
||||
target_repo = REPOSITORY
|
||||
|
||||
if target_repo != REPOSITORY and not json.loads(environment['HAS_ARCHIVE_REPO_TOKEN']):
|
||||
return None
|
||||
if does_not_have_needed_token:
|
||||
if not json.loads(environment['HAS_ARCHIVE_REPO_TOKEN']):
|
||||
print(f'::error::Repository access secret {target_repo_token} not found')
|
||||
return None
|
||||
target_repo_token = 'ARCHIVE_REPO_TOKEN'
|
||||
|
||||
if target_repo == REPOSITORY and not INPUTS['prerelease']:
|
||||
pypi_project = environment['PYPI_PROJECT'] or None
|
||||
@@ -98,6 +111,7 @@ def setup_variables(environment):
|
||||
'channel': resolved_source,
|
||||
'version': version,
|
||||
'target_repo': target_repo,
|
||||
'target_repo_token': target_repo_token,
|
||||
'target_tag': target_tag,
|
||||
'pypi_project': pypi_project,
|
||||
'pypi_suffix': pypi_suffix,
|
||||
@@ -133,7 +147,6 @@ if __name__ == '__main__':
|
||||
|
||||
outputs = setup_variables(dict(os.environ))
|
||||
if not outputs:
|
||||
print('::error::Repository access secret ARCHIVE_REPO_TOKEN not found')
|
||||
sys.exit(1)
|
||||
|
||||
print('::group::Output variables')
|
||||
|
||||
@@ -9,10 +9,8 @@ import json
|
||||
from devscripts.setup_variables import STABLE_REPOSITORY, process_inputs, setup_variables
|
||||
from devscripts.utils import calculate_version
|
||||
|
||||
GENERATE_TEST_DATA = object()
|
||||
|
||||
|
||||
def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected, ignore_revision=False):
|
||||
def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected=None, ignore_revision=False):
|
||||
inp = inputs.copy()
|
||||
inp.setdefault('linux_armv7l', True)
|
||||
inp.setdefault('prerelease', False)
|
||||
@@ -35,19 +33,16 @@ def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected, ig
|
||||
'TARGET_PYPI_SUFFIX': variables.get(f'{target_repo}_PYPI_SUFFIX') or '',
|
||||
'SOURCE_ARCHIVE_REPO': variables.get(f'{source_repo}_ARCHIVE_REPO') or '',
|
||||
'TARGET_ARCHIVE_REPO': variables.get(f'{target_repo}_ARCHIVE_REPO') or '',
|
||||
'HAS_SOURCE_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get(f'{source_repo}_ARCHIVE_REPO_TOKEN'))),
|
||||
'HAS_TARGET_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get(f'{target_repo}_ARCHIVE_REPO_TOKEN'))),
|
||||
'HAS_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get('ARCHIVE_REPO_TOKEN'))),
|
||||
}
|
||||
|
||||
result = setup_variables(env)
|
||||
|
||||
if expected is GENERATE_TEST_DATA:
|
||||
if not expected:
|
||||
print(' {\n' + '\n'.join(f' {k!r}: {v!r},' for k, v in result.items()) + '\n }')
|
||||
return
|
||||
|
||||
if expected is None:
|
||||
assert result is None, f'expected error/None but got dict: {github_repository} {note}'
|
||||
return
|
||||
|
||||
exp = expected.copy()
|
||||
if ignore_revision:
|
||||
assert len(result['version']) == len(exp['version']), f'revision missing: {github_repository} {note}'
|
||||
@@ -82,6 +77,7 @@ def test_setup_variables():
|
||||
'channel': 'stable',
|
||||
'version': DEFAULT_VERSION,
|
||||
'target_repo': STABLE_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': DEFAULT_VERSION,
|
||||
'pypi_project': 'yt-dlp',
|
||||
'pypi_suffix': None,
|
||||
@@ -95,6 +91,7 @@ def test_setup_variables():
|
||||
'channel': 'nightly',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': 'yt-dlp/yt-dlp-nightly-builds',
|
||||
'target_repo_token': 'ARCHIVE_REPO_TOKEN',
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': 'yt-dlp',
|
||||
'pypi_suffix': 'dev',
|
||||
@@ -109,6 +106,7 @@ def test_setup_variables():
|
||||
'channel': 'nightly',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': 'yt-dlp/yt-dlp-nightly-builds',
|
||||
'target_repo_token': 'ARCHIVE_REPO_TOKEN',
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': 'yt-dlp',
|
||||
'pypi_suffix': 'dev',
|
||||
@@ -122,6 +120,7 @@ def test_setup_variables():
|
||||
'channel': 'master',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': 'yt-dlp/yt-dlp-master-builds',
|
||||
'target_repo_token': 'ARCHIVE_REPO_TOKEN',
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -136,6 +135,7 @@ def test_setup_variables():
|
||||
'channel': 'master',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': 'yt-dlp/yt-dlp-master-builds',
|
||||
'target_repo_token': 'ARCHIVE_REPO_TOKEN',
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -149,6 +149,7 @@ def test_setup_variables():
|
||||
'channel': 'stable',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': STABLE_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': 'experimental',
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -162,6 +163,7 @@ def test_setup_variables():
|
||||
'channel': 'stable',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': STABLE_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': 'experimental',
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -173,6 +175,7 @@ def test_setup_variables():
|
||||
'channel': FORK_REPOSITORY,
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -183,6 +186,7 @@ def test_setup_variables():
|
||||
'channel': FORK_REPOSITORY,
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -197,6 +201,7 @@ def test_setup_variables():
|
||||
'channel': f'{FORK_REPOSITORY}@nightly',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': 'nightly',
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -211,6 +216,7 @@ def test_setup_variables():
|
||||
'channel': f'{FORK_REPOSITORY}@master',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': 'master',
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -221,6 +227,7 @@ def test_setup_variables():
|
||||
'channel': FORK_REPOSITORY,
|
||||
'version': f'{DEFAULT_VERSION[:10]}.123',
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': f'{DEFAULT_VERSION[:10]}.123',
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -232,6 +239,7 @@ def test_setup_variables():
|
||||
'channel': FORK_REPOSITORY,
|
||||
'version': DEFAULT_VERSION,
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': DEFAULT_VERSION,
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -242,16 +250,19 @@ def test_setup_variables():
|
||||
'channel': FORK_REPOSITORY,
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
}, ignore_revision=True)
|
||||
|
||||
_test(
|
||||
FORK_REPOSITORY, 'fork, nightly', {
|
||||
FORK_REPOSITORY, 'fork w/NIGHTLY_ARCHIVE_REPO_TOKEN, nightly', {
|
||||
'NIGHTLY_ARCHIVE_REPO': f'{FORK_ORG}/yt-dlp-nightly-builds',
|
||||
'PYPI_PROJECT': 'yt-dlp-test',
|
||||
}, BASE_REPO_SECRETS, {
|
||||
}, {
|
||||
'NIGHTLY_ARCHIVE_REPO_TOKEN': '1',
|
||||
}, {
|
||||
'source': f'{FORK_ORG}/yt-dlp-nightly-builds',
|
||||
'target': 'nightly',
|
||||
'prerelease': True,
|
||||
@@ -259,16 +270,19 @@ def test_setup_variables():
|
||||
'channel': f'{FORK_ORG}/yt-dlp-nightly-builds',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': f'{FORK_ORG}/yt-dlp-nightly-builds',
|
||||
'target_repo_token': 'NIGHTLY_ARCHIVE_REPO_TOKEN',
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
}, ignore_revision=True)
|
||||
_test(
|
||||
FORK_REPOSITORY, 'fork, master', {
|
||||
FORK_REPOSITORY, 'fork w/MASTER_ARCHIVE_REPO_TOKEN, master', {
|
||||
'MASTER_ARCHIVE_REPO': f'{FORK_ORG}/yt-dlp-master-builds',
|
||||
'MASTER_PYPI_PROJECT': 'yt-dlp-test',
|
||||
'MASTER_PYPI_SUFFIX': 'dev',
|
||||
}, BASE_REPO_SECRETS, {
|
||||
}, {
|
||||
'MASTER_ARCHIVE_REPO_TOKEN': '1',
|
||||
}, {
|
||||
'source': f'{FORK_ORG}/yt-dlp-master-builds',
|
||||
'target': 'master',
|
||||
'prerelease': True,
|
||||
@@ -276,6 +290,7 @@ def test_setup_variables():
|
||||
'channel': f'{FORK_ORG}/yt-dlp-master-builds',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': f'{FORK_ORG}/yt-dlp-master-builds',
|
||||
'target_repo_token': 'MASTER_ARCHIVE_REPO_TOKEN',
|
||||
'target_tag': DEFAULT_VERSION_WITH_REVISION,
|
||||
'pypi_project': 'yt-dlp-test',
|
||||
'pypi_suffix': 'dev',
|
||||
@@ -287,6 +302,7 @@ def test_setup_variables():
|
||||
'channel': f'{FORK_REPOSITORY}@experimental',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': 'experimental',
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
@@ -301,15 +317,8 @@ def test_setup_variables():
|
||||
'channel': 'stable',
|
||||
'version': DEFAULT_VERSION_WITH_REVISION,
|
||||
'target_repo': FORK_REPOSITORY,
|
||||
'target_repo_token': None,
|
||||
'target_tag': 'experimental',
|
||||
'pypi_project': None,
|
||||
'pypi_suffix': None,
|
||||
}, ignore_revision=True)
|
||||
|
||||
_test(
|
||||
STABLE_REPOSITORY, 'official vars but no ARCHIVE_REPO_TOKEN, nightly',
|
||||
BASE_REPO_VARS, {}, {
|
||||
'source': 'nightly',
|
||||
'target': 'nightly',
|
||||
'prerelease': True,
|
||||
}, None)
|
||||
|
||||
@@ -56,7 +56,7 @@ default = [
|
||||
"requests>=2.32.2,<3",
|
||||
"urllib3>=2.0.2,<3",
|
||||
"websockets>=13.0",
|
||||
"yt-dlp-ejs==0.3.2",
|
||||
"yt-dlp-ejs==0.3.1",
|
||||
]
|
||||
curl-cffi = [
|
||||
"curl-cffi>=0.5.10,!=0.6.*,!=0.7.*,!=0.8.*,!=0.9.*,<0.14; implementation_name=='cpython'",
|
||||
@@ -69,7 +69,7 @@ build = [
|
||||
"build",
|
||||
"hatchling>=1.27.0",
|
||||
"pip",
|
||||
"setuptools>=71.0.2",
|
||||
"setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149
|
||||
"wheel",
|
||||
]
|
||||
dev = [
|
||||
@@ -86,7 +86,7 @@ test = [
|
||||
"pytest-rerunfailures~=14.0",
|
||||
]
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.17.0", # 6.17.0+ needed for compat with setuptools 81+
|
||||
"pyinstaller>=6.13.0", # Windows temp cleanup fixed in 6.13.0
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
@@ -50,10 +50,8 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **aenetworks:collection**
|
||||
- **aenetworks:show**
|
||||
- **AeonCo**
|
||||
- **agalega:videos**
|
||||
- **AirTV**
|
||||
- **AitubeKZVideo**
|
||||
- **Alibaba**
|
||||
- **AliExpressLive**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
@@ -192,7 +190,6 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **Biography**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **Bitmovin**
|
||||
- **BlackboardCollaborate**
|
||||
- **BlackboardCollaborateLaunch**
|
||||
- **BleacherReport**: (**Currently broken**)
|
||||
@@ -734,7 +731,7 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **loc**: Library of Congress
|
||||
- **Loco**
|
||||
- **loom**
|
||||
- **loom:folder**: (**Currently broken**)
|
||||
- **loom:folder**
|
||||
- **LoveHomePorn**
|
||||
- **LRTRadio**
|
||||
- **LRTStream**
|
||||
@@ -765,8 +762,7 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **massengeschmack.tv**
|
||||
- **Masters**
|
||||
- **MatchTV**
|
||||
- **mave**
|
||||
- **mave:channel**
|
||||
- **Mave**
|
||||
- **MBN**: mbn.co.kr (매일방송)
|
||||
- **MDR**: MDR.DE
|
||||
- **MedalTV**
|
||||
@@ -899,8 +895,6 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **NerdCubedFeed**
|
||||
- **Nest**
|
||||
- **NestClip**
|
||||
- **NetAppCollection**
|
||||
- **NetAppVideo**
|
||||
- **netease:album**: 网易云音乐 - 专辑
|
||||
- **netease:djradio**: 网易云音乐 - 电台
|
||||
- **netease:mv**: 网易云音乐 - MV
|
||||
@@ -968,7 +962,6 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **NovaEmbed**
|
||||
- **NovaPlay**
|
||||
- **NowCanal**
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
@@ -1380,7 +1373,7 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **Spiegel**
|
||||
- **Sport5**
|
||||
- **SportBox**: (**Currently broken**)
|
||||
- **sporteurope**
|
||||
- **SportDeutschland**
|
||||
- **Spreaker**
|
||||
- **SpreakerShow**
|
||||
- **SpringboardPlatform**
|
||||
@@ -1468,8 +1461,6 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **TFO**: (**Currently broken**)
|
||||
- **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine")
|
||||
- **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine")
|
||||
- **TheChosen**
|
||||
- **TheChosenGroup**
|
||||
- **TheGuardianPodcast**
|
||||
- **TheGuardianPodcastPlaylist**
|
||||
- **TheHighWire**
|
||||
@@ -1787,7 +1778,6 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **YapFiles**: (**Currently broken**)
|
||||
- **Yappy**: (**Currently broken**)
|
||||
- **YappyProfile**
|
||||
- **yfanefa**
|
||||
- **YleAreena**
|
||||
- **YouJizz**
|
||||
- **youku**: 优酷
|
||||
|
||||
@@ -261,9 +261,9 @@ def sanitize_got_info_dict(got_dict):
|
||||
def expect_info_dict(self, got_dict, expected_dict):
|
||||
ALLOWED_KEYS_SORT_ORDER = (
|
||||
# NB: Keep in sync with the docstring of extractor/common.py
|
||||
'ie_key', 'url', 'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type',
|
||||
'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type',
|
||||
'uploader', 'uploader_id', 'uploader_url', 'channel', 'channel_id', 'channel_url', 'channel_is_verified',
|
||||
'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count', 'save_count',
|
||||
'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count',
|
||||
'like_count', 'dislike_count', 'repost_count', 'average_rating', 'age_limit', 'duration', 'thumbnail', 'heatmap',
|
||||
'chapters', 'chapter', 'chapter_number', 'chapter_id', 'start_time', 'end_time', 'section_start', 'section_end',
|
||||
'categories', 'tags', 'cast', 'composers', 'artists', 'album_artists', 'creators', 'genres',
|
||||
|
||||
44
test/test_iqiyi_sdk_interpreter.py
Normal file
44
test/test_iqiyi_sdk_interpreter.py
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
from yt_dlp.extractor import IqiyiIE
|
||||
|
||||
|
||||
class WarningLogger:
|
||||
def __init__(self):
|
||||
self.messages = []
|
||||
|
||||
def warning(self, msg):
|
||||
self.messages.append(msg)
|
||||
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestIqiyiSDKInterpreter(unittest.TestCase):
|
||||
def test_iqiyi_sdk_interpreter(self):
|
||||
"""
|
||||
Test the functionality of IqiyiSDKInterpreter by trying to log in
|
||||
|
||||
If `sign` is incorrect, /validate call throws an HTTP 556 error
|
||||
"""
|
||||
logger = WarningLogger()
|
||||
ie = IqiyiIE(FakeYDL({'logger': logger}))
|
||||
ie._perform_login('foo', 'bar')
|
||||
self.assertTrue('unable to log in:' in logger.messages[0])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -755,17 +755,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
||||
|
||||
def test_partial_read_greater_than_response_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
for encoding in ('', 'gzip', 'deflate'):
|
||||
res = validate_and_send(rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': encoding}))
|
||||
assert res.headers.get('Content-Encoding') == encoding
|
||||
assert res.read(512) == b'<html><video src="/vid.mp4" /></html>'
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b''
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.handler_flaky('CurlCFFI', reason='segfaults')
|
||||
@@ -931,28 +920,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||
assert res.fp.fp is None
|
||||
assert res.closed
|
||||
|
||||
def test_data_uri_partial_read_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read(6) == b'hello '
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'world'
|
||||
# Should automatically close the underlying file object
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_data_uri_partial_read_greater_than_response_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(rh, Request('data:text/plain,hello%20world'))
|
||||
assert res.read(512) == b'hello world'
|
||||
# Response and its underlying file object should already be closed now
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b''
|
||||
assert res.fp.closed
|
||||
assert res.closed
|
||||
|
||||
def test_http_error_returns_content(self, handler):
|
||||
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
||||
def get_response():
|
||||
|
||||
@@ -29,11 +29,6 @@ class TestMetadataFromField(unittest.TestCase):
|
||||
MetadataParserPP.format_to_regex('%(title)s - %(artist)s'),
|
||||
r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)')
|
||||
self.assertEqual(MetadataParserPP.format_to_regex(r'text (?P<x>.+)'), r'text (?P<x>.+)')
|
||||
self.assertEqual(MetadataParserPP.format_to_regex('x'), r'(?s)(?P<x>.+)')
|
||||
self.assertEqual(MetadataParserPP.format_to_regex('Field_Name1'), r'(?s)(?P<Field_Name1>.+)')
|
||||
self.assertEqual(MetadataParserPP.format_to_regex('é'), r'(?s)(?P<é>.+)')
|
||||
self.assertEqual(MetadataParserPP.format_to_regex('invalid '), 'invalid ')
|
||||
|
||||
def test_field_to_template(self):
|
||||
self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s')
|
||||
|
||||
@@ -489,10 +489,6 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
|
||||
self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
|
||||
|
||||
self.assertEqual(unified_timestamp('2026-01-01 00:00:00', tz_offset=0), 1767225600)
|
||||
self.assertEqual(unified_timestamp('2026-01-01 00:00:00', tz_offset=8), 1767196800)
|
||||
self.assertEqual(unified_timestamp('2026-01-01 00:00:00 +0800', tz_offset=-5), 1767196800)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
|
||||
@@ -1280,9 +1276,6 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('[new Date("spam"), \'("eggs")\']')
|
||||
self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
|
||||
|
||||
on = js_to_json('[0.077, 7.06, 29.064, 169.0072]')
|
||||
self.assertEqual(json.loads(on), [0.077, 7.06, 29.064, 169.0072])
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
@@ -1410,9 +1403,6 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(version_tuple('1'), (1,))
|
||||
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
||||
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
|
||||
self.assertEqual(version_tuple('invalid', lenient=True), (-1,))
|
||||
self.assertEqual(version_tuple('1.2.3', lenient=True), (1, 2, 3))
|
||||
self.assertEqual(version_tuple('12.34-something', lenient=True), (12, 34, -1))
|
||||
|
||||
def test_detect_exe_version(self):
|
||||
self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1
|
||||
|
||||
@@ -40,7 +40,7 @@ TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
pytestmark = pytest.mark.handler_flaky(
|
||||
'Websockets',
|
||||
os.name == 'nt' or sys.implementation.name == 'pypy',
|
||||
os.name != 'nt' and sys.implementation.name == 'pypy',
|
||||
reason='segfaults',
|
||||
)
|
||||
|
||||
|
||||
@@ -595,7 +595,7 @@ class YoutubeDL:
|
||||
'width', 'height', 'asr', 'audio_channels', 'fps',
|
||||
'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
|
||||
'timestamp', 'release_timestamp', 'available_at',
|
||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'save_count',
|
||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||
'average_rating', 'comment_count', 'age_limit',
|
||||
'start_time', 'end_time',
|
||||
'chapter_number', 'season_number', 'episode_number',
|
||||
@@ -3026,10 +3026,6 @@ class YoutubeDL:
|
||||
format_selector = self.format_selector
|
||||
while True:
|
||||
if interactive_format_selection:
|
||||
if not formats:
|
||||
# Bypass interactive format selection if no formats & --ignore-no-formats-error
|
||||
formats_to_download = None
|
||||
break
|
||||
req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
|
||||
+ '(Press ENTER for default, or Ctrl+C to quit)'
|
||||
+ self._format_screen(': ', self.Styles.EMPHASIS))
|
||||
|
||||
@@ -212,16 +212,9 @@ def _firefox_browser_dirs():
|
||||
|
||||
else:
|
||||
yield from map(os.path.expanduser, (
|
||||
# New installations of FF147+ respect the XDG base directory specification
|
||||
# Ref: https://bugzilla.mozilla.org/show_bug.cgi?id=259356
|
||||
os.path.join(_config_home(), 'mozilla/firefox'),
|
||||
# Existing FF version<=146 installations
|
||||
'~/.mozilla/firefox',
|
||||
# Flatpak XDG: https://docs.flatpak.org/en/latest/conventions.html#xdg-base-directories
|
||||
'~/.var/app/org.mozilla.firefox/config/mozilla/firefox',
|
||||
'~/.var/app/org.mozilla.firefox/.mozilla/firefox',
|
||||
# Snap installations do not respect the XDG base directory specification
|
||||
'~/snap/firefox/common/.mozilla/firefox',
|
||||
'~/.var/app/org.mozilla.firefox/.mozilla/firefox',
|
||||
))
|
||||
|
||||
|
||||
|
||||
@@ -461,8 +461,7 @@ class FileDownloader:
|
||||
min_sleep_interval = self.params.get('sleep_interval') or 0
|
||||
max_sleep_interval = self.params.get('max_sleep_interval') or 0
|
||||
|
||||
requested_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
if available_at := max(f.get('available_at') or 0 for f in requested_formats):
|
||||
if available_at := info_dict.get('available_at'):
|
||||
forced_sleep_interval = available_at - int(time.time())
|
||||
if forced_sleep_interval > min_sleep_interval:
|
||||
sleep_note = 'as required by the site'
|
||||
|
||||
@@ -457,8 +457,6 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
@classmethod
|
||||
def available(cls, path=None):
|
||||
# TODO: Fix path for ffmpeg
|
||||
# Fixme: This may be wrong when --ffmpeg-location is used
|
||||
return FFmpegPostProcessor().available
|
||||
|
||||
def on_process_started(self, proc, stdin):
|
||||
|
||||
@@ -75,7 +75,6 @@ from .afreecatv import (
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
)
|
||||
from .agalega import AGalegaIE
|
||||
from .agora import (
|
||||
TokFMAuditionIE,
|
||||
TokFMPodcastIE,
|
||||
@@ -84,7 +83,6 @@ from .agora import (
|
||||
)
|
||||
from .airtv import AirTVIE
|
||||
from .aitube import AitubeKZVideoIE
|
||||
from .alibaba import AlibabaIE
|
||||
from .aliexpress import AliExpressLiveIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .allocine import AllocineIE
|
||||
@@ -431,7 +429,6 @@ from .cpac import (
|
||||
)
|
||||
from .cracked import CrackedIE
|
||||
from .craftsy import CraftsyIE
|
||||
from .croatianfilm import CroatianFilmIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crowdbunker import (
|
||||
CrowdBunkerChannelIE,
|
||||
@@ -639,7 +636,6 @@ from .fc2 import (
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .fifa import FifaIE
|
||||
from .filmarchiv import FilmArchivIE
|
||||
from .filmon import (
|
||||
FilmOnChannelIE,
|
||||
FilmOnIE,
|
||||
@@ -695,10 +691,6 @@ from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
)
|
||||
from .frontro import (
|
||||
TheChosenGroupIE,
|
||||
TheChosenIE,
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
@@ -1088,6 +1080,11 @@ from .mangomolo import (
|
||||
MangomoloLiveIE,
|
||||
MangomoloVideoIE,
|
||||
)
|
||||
from .manoto import (
|
||||
ManotoTVIE,
|
||||
ManotoTVLiveIE,
|
||||
ManotoTVShowIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .maoritv import MaoriTVIE
|
||||
from .markiza import (
|
||||
@@ -1097,10 +1094,7 @@ from .markiza import (
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .masters import MastersIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mave import (
|
||||
MaveChannelIE,
|
||||
MaveIE,
|
||||
)
|
||||
from .mave import MaveIE
|
||||
from .mbn import MBNIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
@@ -1275,7 +1269,6 @@ from .nebula import (
|
||||
NebulaChannelIE,
|
||||
NebulaClassIE,
|
||||
NebulaIE,
|
||||
NebulaSeasonIE,
|
||||
NebulaSubscriptionsIE,
|
||||
)
|
||||
from .nekohacker import NekoHackerIE
|
||||
@@ -1284,10 +1277,6 @@ from .nest import (
|
||||
NestClipIE,
|
||||
NestIE,
|
||||
)
|
||||
from .netapp import (
|
||||
NetAppCollectionIE,
|
||||
NetAppVideoIE,
|
||||
)
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
@@ -1310,6 +1299,12 @@ from .newgrounds import (
|
||||
)
|
||||
from .newspicks import NewsPicksIE
|
||||
from .newsy import NewsyIE
|
||||
from .nextmedia import (
|
||||
AppleDailyIE,
|
||||
NextMediaActionNewsIE,
|
||||
NextMediaIE,
|
||||
NextTVIE,
|
||||
)
|
||||
from .nexx import (
|
||||
NexxEmbedIE,
|
||||
NexxIE,
|
||||
@@ -1478,7 +1473,6 @@ from .palcomp3 import (
|
||||
PalcoMP3IE,
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandatv import PandaTvIE
|
||||
from .panopto import (
|
||||
PanoptoIE,
|
||||
PanoptoListIE,
|
||||
@@ -1827,6 +1821,10 @@ from .scrippsnetworks import (
|
||||
ScrippsNetworksWatchIE,
|
||||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .scte import (
|
||||
SCTEIE,
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .sejmpl import SejmIE
|
||||
from .sen import SenIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
@@ -2008,11 +2006,6 @@ from .taptap import (
|
||||
TapTapMomentIE,
|
||||
TapTapPostIntlIE,
|
||||
)
|
||||
from .tarangplus import (
|
||||
TarangPlusEpisodesIE,
|
||||
TarangPlusPlaylistIE,
|
||||
TarangPlusVideoIE,
|
||||
)
|
||||
from .tass import TassIE
|
||||
from .tbs import TBSIE
|
||||
from .tbsjp import (
|
||||
@@ -2530,7 +2523,6 @@ from .yappy import (
|
||||
YappyIE,
|
||||
YappyProfileIE,
|
||||
)
|
||||
from .yfanefa import YfanefaIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import (
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import jwt_decode_hs256, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AGalegaBaseIE(InfoExtractor):
|
||||
_access_token = None
|
||||
|
||||
@staticmethod
|
||||
def _jwt_is_expired(token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 120
|
||||
|
||||
def _refresh_access_token(self, video_id):
|
||||
AGalegaBaseIE._access_token = self._download_json(
|
||||
'https://www.agalega.gal/api/fetch-api/jwt/token', video_id,
|
||||
note='Downloading access token',
|
||||
data=json.dumps({
|
||||
'username': None,
|
||||
'password': None,
|
||||
'client': 'crtvg',
|
||||
'checkExistsCookies': False,
|
||||
}).encode())['access']
|
||||
|
||||
def _call_api(self, endpoint, display_id, note, fatal=True, query=None):
|
||||
if not AGalegaBaseIE._access_token or self._jwt_is_expired(AGalegaBaseIE._access_token):
|
||||
self._refresh_access_token(endpoint)
|
||||
return self._download_json(
|
||||
f'https://api-agalega.interactvty.com/api/2.0/contents/{endpoint}', display_id,
|
||||
note=note, fatal=fatal, query=query,
|
||||
headers={'Authorization': f'jwtok {AGalegaBaseIE._access_token}'})
|
||||
|
||||
|
||||
class AGalegaIE(AGalegaBaseIE):
|
||||
IE_NAME = 'agalega:videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?agalega\.gal/videos/(?:detail/)?(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.agalega.gal/videos/288664-lr-ninguencheconta',
|
||||
'md5': '04533a66c5f863d08dd9724b11d1c223',
|
||||
'info_dict': {
|
||||
'id': '288664',
|
||||
'title': 'Roberto e Ángel Martín atenden consultas dos espectadores',
|
||||
'description': 'O cómico ademais fai un repaso dalgúns momentos da súa traxectoria profesional',
|
||||
'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/2ef32c3b9f6249d9868fd8f11d389d3d.png',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.agalega.gal/videos/detail/296152-pulso-activo-7',
|
||||
'md5': '26df7fdcf859f38ad92d837279d6b56d',
|
||||
'info_dict': {
|
||||
'id': '296152',
|
||||
'title': 'Pulso activo | 18-11-2025',
|
||||
'description': 'Anxo, Noemí, Silvia e Estrella comparten as sensacións da clase de Eddy.',
|
||||
'thumbnail': 'https://crtvg-bucket.flumotion.cloud/content_cards/a6bb7da6c8994b82bf961ac6cad1707b.png',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
content_data = self._call_api(
|
||||
f'content/{video_id}/', video_id, note='Downloading content data', fatal=False,
|
||||
query={
|
||||
'optional_fields': 'image,is_premium,short_description,has_subtitle',
|
||||
})
|
||||
resource_data = self._call_api(
|
||||
f'content_resources/{video_id}/', video_id, note='Downloading resource data',
|
||||
query={
|
||||
'optional_fields': 'media_url',
|
||||
})
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for m3u8_url in traverse_obj(resource_data, ('results', ..., 'media_url', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls')
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': (('description', 'short_description'), {str}, any),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, str_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AlibabaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?alibaba\.com/product-detail/[\w-]+_(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alibaba.com/product-detail/Kids-Entertainment-Bouncer-Bouncy-Castle-Waterslide_1601271126969.html',
|
||||
'info_dict': {
|
||||
'id': '6000280444270',
|
||||
'display_id': '1601271126969',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kids Entertainment Bouncer Bouncy Castle Waterslide Juex Gonflables Commercial Inflatable Tropical Water Slide',
|
||||
'duration': 30,
|
||||
'thumbnail': 'https://sc04.alicdn.com/kf/Hc5bb391974454af18c7a4f91cbe4062bg.jpg_120x120.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
product_data = self._search_json(
|
||||
r'window\.detailData\s*=', webpage, 'detail data', display_id)['globalData']['product']
|
||||
|
||||
return {
|
||||
**traverse_obj(product_data, ('mediaItems', lambda _, v: v['type'] == 'video' and v['videoId'], any, {
|
||||
'id': ('videoId', {int}, {str_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('videoCoverUrl', {url_or_none}),
|
||||
'formats': ('videoUrl', lambda _, v: url_or_none(v['videoUrl']), {
|
||||
'url': 'videoUrl',
|
||||
'format_id': ('definition', {str_or_none}),
|
||||
'tbr': ('bitrate', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': ('length', {int_or_none}),
|
||||
}),
|
||||
})),
|
||||
'title': traverse_obj(product_data, ('subject', {str})),
|
||||
'display_id': display_id,
|
||||
}
|
||||
@@ -279,7 +279,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/'),
|
||||
}
|
||||
|
||||
metadata = self._download_json(f'https://archive.org/metadata/{identifier}', identifier)
|
||||
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
|
||||
m = metadata['metadata']
|
||||
identifier = m['identifier']
|
||||
|
||||
@@ -704,24 +704,6 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': 'https://web.archive.org/web/20160108040020if_/https://i.ytimg.com/vi/SQCom7wjGDs/maxresdefault.jpg',
|
||||
'upload_date': '20160107',
|
||||
},
|
||||
}, {
|
||||
# dmuxed formats
|
||||
'url': 'https://web.archive.org/web/20240922160632/https://www.youtube.com/watch?v=z7hzvTL3k1k',
|
||||
'info_dict': {
|
||||
'id': 'z7hzvTL3k1k',
|
||||
'ext': 'webm',
|
||||
'title': 'Praise the Lord and Pass the Ammunition (BARRXN REMIX)',
|
||||
'description': 'md5:45dbf2c71c23b0734c8dfb82dd1e94b6',
|
||||
'uploader': 'Barrxn',
|
||||
'uploader_id': 'TheRockstar6086',
|
||||
'uploader_url': 'https://www.youtube.com/user/TheRockstar6086',
|
||||
'channel_id': 'UCjJPGUTtvR9uizmawn2ThqA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCjJPGUTtvR9uizmawn2ThqA',
|
||||
'duration': 125,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20201207',
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
'only_matching': True,
|
||||
@@ -1078,19 +1060,6 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
|
||||
return orderedSet(filter(None, capture_dates))
|
||||
|
||||
def _parse_fmt(self, fmt, extra_info=None):
|
||||
format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
|
||||
return {
|
||||
'format_id': format_id,
|
||||
**self._FORMATS.get(format_id, {}),
|
||||
**traverse_obj(fmt, {
|
||||
'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
|
||||
'ext': ('ext', {str}),
|
||||
'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
|
||||
}),
|
||||
**(extra_info or {}),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
||||
url_date = url_date or url_date_2
|
||||
@@ -1121,14 +1090,17 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
formats = []
|
||||
if video_info.get('dmux'):
|
||||
for vf in traverse_obj(video_info, ('formats', 'video', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(vf, {'acodec': 'none'}))
|
||||
for af in traverse_obj(video_info, ('formats', 'audio', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(af, {'vcodec': 'none'}))
|
||||
else:
|
||||
for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append(self._parse_fmt(fmt))
|
||||
for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
|
||||
format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
**self._FORMATS.get(format_id, {}),
|
||||
**traverse_obj(fmt, {
|
||||
'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
|
||||
'ext': ('ext', {str}),
|
||||
'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
|
||||
}),
|
||||
})
|
||||
info['formats'] = formats
|
||||
|
||||
return info
|
||||
|
||||
@@ -5,18 +5,16 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_filesize,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
strftime_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
@@ -413,67 +411,70 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'Bandcamp:weekly'
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/radio/?\?(?:[^#]+&)?show=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://bandcamp.com/radio?show=224',
|
||||
'url': 'https://bandcamp.com/?show=224',
|
||||
'md5': '61acc9a002bed93986b91168aa3ab433',
|
||||
'info_dict': {
|
||||
'id': '224',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bandcamp Weekly, 2017-04-04',
|
||||
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/9982549_0.jpg',
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode_id': '224',
|
||||
'release_timestamp': 1491264000,
|
||||
'release_date': '20170404',
|
||||
'duration': 5829.77,
|
||||
'release_date': '20170404',
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': 'Magic Moments',
|
||||
'episode_id': '224',
|
||||
},
|
||||
'params': {
|
||||
'format': 'mp3-128',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/radio/?foo=bar&show=224',
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
audio_data = self._download_json(
|
||||
'https://bandcamp.com/api/bcradio_api/1/get_show',
|
||||
show_id, 'Downloading radio show JSON',
|
||||
data=json.dumps({'id': show_id}).encode(),
|
||||
headers={'Content-Type': 'application/json'})['radioShowAudio']
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
|
||||
stream_url = audio_data['streamUrl']
|
||||
format_id = traverse_obj(stream_url, ({parse_qs}, 'enc', -1))
|
||||
encoding, _, bitrate_str = (format_id or '').partition('-')
|
||||
blob = self._extract_data_attr(webpage, show_id, 'blob')
|
||||
|
||||
webpage = self._download_webpage(url, show_id, fatal=False)
|
||||
metadata = traverse_obj(
|
||||
self._extract_data_attr(webpage, show_id, 'blob', fatal=False),
|
||||
('appData', 'shows', lambda _, v: str(v['showId']) == show_id, any)) or {}
|
||||
show = blob['bcw_data'][show_id]
|
||||
|
||||
series_title = audio_data.get('title') or metadata.get('title')
|
||||
release_timestamp = unified_timestamp(audio_data.get('date')) or unified_timestamp(metadata.get('date'))
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
ext = known_ext
|
||||
break
|
||||
else:
|
||||
ext = None
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||
subtitle = show.get('subtitle')
|
||||
if subtitle:
|
||||
title += f' - {subtitle}'
|
||||
|
||||
return {
|
||||
'id': show_id,
|
||||
'title': title,
|
||||
'description': show.get('desc') or show.get('short_desc'),
|
||||
'duration': float_or_none(show.get('audio_duration')),
|
||||
'is_live': False,
|
||||
'release_date': unified_strdate(show.get('published_date')),
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': show.get('subtitle'),
|
||||
'episode_id': show_id,
|
||||
'title': join_nonempty(series_title, strftime_or_none(release_timestamp, '%Y-%m-%d'), delim=', '),
|
||||
'series': series_title,
|
||||
'thumbnail': format_field(metadata, 'imageId', 'https://f4.bcbits.com/img/%s_0.jpg', default=None),
|
||||
'description': metadata.get('desc') or metadata.get('short_desc'),
|
||||
'duration': float_or_none(audio_data.get('duration')),
|
||||
'release_timestamp': release_timestamp,
|
||||
'formats': [{
|
||||
'url': stream_url,
|
||||
'format_id': format_id,
|
||||
'ext': encoding or 'mp3',
|
||||
'acodec': encoding or None,
|
||||
'vcodec': 'none',
|
||||
'abr': int_or_none(bitrate_str),
|
||||
}],
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, UserNotLive, urlencode_postdata
|
||||
from ..utils import ExtractorError, urlencode_postdata
|
||||
|
||||
|
||||
class BigoIE(InfoExtractor):
|
||||
@@ -40,7 +40,7 @@ class BigoIE(InfoExtractor):
|
||||
info = info_raw.get('data') or {}
|
||||
|
||||
if not info.get('alive'):
|
||||
raise UserNotLive(video_id=user_id)
|
||||
raise ExtractorError('This user is offline.', expected=True)
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
info.get('hls_src'), user_id, 'mp4', 'm3u8')
|
||||
|
||||
@@ -27,7 +27,7 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CDAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:(?:www|m)\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
|
||||
_NETRC_MACHINE = 'cdapl'
|
||||
|
||||
_BASE_URL = 'https://www.cda.pl'
|
||||
@@ -110,9 +110,6 @@ class CDAIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.cda.pl/video/617297677',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||
@@ -370,35 +367,35 @@ class CDAIE(InfoExtractor):
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
'info_dict': {
|
||||
'id': '31188385',
|
||||
'title': 'SERIA DRUGA',
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
'info_dict': {
|
||||
'id': '31188385',
|
||||
'title': 'SERIA DRUGA',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}, {
|
||||
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
|
||||
'info_dict': {
|
||||
'id': '2664592',
|
||||
'title': 'VideoDowcipy - wszystkie odcinki',
|
||||
{
|
||||
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
|
||||
'info_dict': {
|
||||
'id': '2664592',
|
||||
'title': 'VideoDowcipy - wszystkie odcinki',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
}, {
|
||||
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
|
||||
'info_dict': {
|
||||
'id': '19129979',
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}, {
|
||||
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.cda.pl/smiechawaTV/folder/2664592/vfilm',
|
||||
'only_matching': True,
|
||||
}]
|
||||
{
|
||||
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
|
||||
'info_dict': {
|
||||
'id': '19129979',
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}, {
|
||||
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
|
||||
|
||||
@@ -348,7 +348,6 @@ class InfoExtractor:
|
||||
duration: Length of the video in seconds, as an integer or float.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
concurrent_view_count: How many users are currently watching the video on the platform.
|
||||
save_count: Number of times the video has been saved or bookmarked
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
join_nonempty,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CroatianFilmIE(InfoExtractor):
|
||||
IE_NAME = 'croatian.film'
|
||||
_VALID_URL = r'https://?(?:www\.)?croatian\.film/[a-z]{2}/[^/?#]+/(?P<id>\d+)'
|
||||
_GEO_COUNTRIES = ['HR']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.croatian.film/hr/films/72472',
|
||||
'info_dict': {
|
||||
'id': '1078340774',
|
||||
'ext': 'mp4',
|
||||
'title': '“ŠKAFETIN”, r. Paško Vukasović',
|
||||
'uploader': 'croatian.film',
|
||||
'uploader_id': 'user94192658',
|
||||
'uploader_url': 'https://vimeo.com/user94192658',
|
||||
'duration': 1357,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/2008556407-40eb1315ec11be5fcb8dda4d7059675b0881e182b9fc730892e267db72cb57f5-d',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# geo-restricted but works with xff
|
||||
'url': 'https://www.croatian.film/en/films/77144',
|
||||
'info_dict': {
|
||||
'id': '1144997795',
|
||||
'ext': 'mp4',
|
||||
'title': '“ROKO” r. Ivana Marinić Kragić',
|
||||
'uploader': 'croatian.film',
|
||||
'uploader_id': 'user94192658',
|
||||
'uploader_url': 'https://vimeo.com/user94192658',
|
||||
'duration': 1023,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/2093793231-11c2928698ff8347489e679b4d563a576e7acd0681ce95b383a9a25f6adb5e8f-d',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
'url': 'https://www.croatian.film/en/films/75904/watch',
|
||||
'info_dict': {
|
||||
'id': '1134883757',
|
||||
'ext': 'mp4',
|
||||
'title': '"CARPE DIEM" r. Nina Damjanović',
|
||||
'uploader': 'croatian.film',
|
||||
'uploader_id': 'user94192658',
|
||||
'uploader_url': 'https://vimeo.com/user94192658',
|
||||
'duration': 1123,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/2080022187-bb691c470c28c4d979258cf235e594bf9a11c14b837a0784326c25c95edd83f9-d',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
api_data = self._download_json(
|
||||
f'https://api.croatian.film/api/videos/{display_id}',
|
||||
display_id)
|
||||
|
||||
if errors := traverse_obj(api_data, ('errors', lambda _, v: v['code'])):
|
||||
codes = traverse_obj(errors, (..., 'code', {str}))
|
||||
if 'INVALID_COUNTRY' in codes:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(join_nonempty(
|
||||
*(traverse_obj(errors, (..., 'details', {str})) or codes),
|
||||
delim='; '))
|
||||
|
||||
vimeo_id = self._search_regex(
|
||||
r'/videos/(\d+)', api_data['video']['vimeoURL'], 'vimeo ID')
|
||||
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', url),
|
||||
VimeoIE, vimeo_id)
|
||||
@@ -1,6 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class DigitekaIE(InfoExtractor):
|
||||
@@ -26,56 +25,74 @@ class DigitekaIE(InfoExtractor):
|
||||
)/(?P<id>[\d+a-z]+)'''
|
||||
_EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/3x5x55k',
|
||||
# news
|
||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
|
||||
'md5': '276a0e49de58c7e85d32b057837952a2',
|
||||
'info_dict': {
|
||||
'id': '3x5x55k',
|
||||
'id': 's8uk0r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il est passionné de DS',
|
||||
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 89,
|
||||
'upload_date': '20251012',
|
||||
'timestamp': 1760285363,
|
||||
'uploader_id': '3pz33',
|
||||
'duration': 74,
|
||||
'upload_date': '20150317',
|
||||
'timestamp': 1426604939,
|
||||
'uploader_id': '3fszv',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# music
|
||||
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
|
||||
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
||||
'info_dict': {
|
||||
'id': 'xvpfp8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Two - C\'est La Vie (clip)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 233,
|
||||
'upload_date': '20150224',
|
||||
'timestamp': 1424760500,
|
||||
'uploader_id': '3rfzk',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_IFRAME_MD_ID = '01836272' # One static ID working for Ultimedia iframes
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
video_type = mobj.group('embed_type') or mobj.group('site_type')
|
||||
if video_type == 'music':
|
||||
video_type = 'musique'
|
||||
|
||||
video_info = self._download_json(
|
||||
f'https://www.ultimedia.com/player/getConf/{self._IFRAME_MD_ID}/1/{video_id}', video_id,
|
||||
note='Downloading player configuration')['video']
|
||||
deliver_info = self._download_json(
|
||||
f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
|
||||
video_id)
|
||||
|
||||
yt_id = deliver_info.get('yt_id')
|
||||
if yt_id:
|
||||
return self.url_result(yt_id, 'Youtube')
|
||||
|
||||
jwconf = deliver_info['jwconf']
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
if hls_url := traverse_obj(video_info, ('media_sources', 'hls', 'hls_auto', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
for format_id, mp4_url in traverse_obj(video_info, ('media_sources', 'mp4', {dict.items}, ...)):
|
||||
if not mp4_url:
|
||||
continue
|
||||
for source in jwconf['playlist'][0]['sources']:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id.partition('_')[2]),
|
||||
'ext': 'mp4',
|
||||
'url': source['file'],
|
||||
'format_id': source.get('label'),
|
||||
})
|
||||
|
||||
title = deliver_info['title']
|
||||
thumbnail = jwconf.get('image')
|
||||
duration = int_or_none(deliver_info.get('duration'))
|
||||
timestamp = int_or_none(deliver_info.get('release_time'))
|
||||
uploader_id = deliver_info.get('owner_id')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('creationDate', {int_or_none}),
|
||||
'uploader_id': ('ownerId', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class DropboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/f[io]|sh?)/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||
@@ -35,9 +35,6 @@ class DropboxIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/scl/fo/zjfqse5txqfd7twa8iewj/AOfZzSYWUSKle2HD7XF7kzQ/A-BEAT%20C.mp4?rlkey=6tg3jkp4tv6a5vt58a6dag0mm&dl=0',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import network_exceptions
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
@@ -62,6 +64,9 @@ class FacebookIE(InfoExtractor):
|
||||
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
|
||||
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''',
|
||||
]
|
||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = 'facebook'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
@@ -464,6 +469,65 @@ class FacebookIE(InfoExtractor):
|
||||
'graphURI': '/api/graphql/',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page_req = Request(self._LOGIN_URL)
|
||||
self._set_cookie('facebook.com', 'locale', 'en_US')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
errnote='Unable to download login page')
|
||||
lsd = self._search_regex(
|
||||
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||
login_page, 'lsd')
|
||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
|
||||
|
||||
login_form = {
|
||||
'email': username,
|
||||
'pass': password,
|
||||
'lsd': lsd,
|
||||
'lgnrnd': lgnrnd,
|
||||
'next': 'http://facebook.com/home.php',
|
||||
'default_persistent': '0',
|
||||
'legacy_return': '1',
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
try:
|
||||
login_results = self._download_webpage(request, None,
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
|
||||
login_results, 'login error', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError(f'Unable to login: {error}', expected=True)
|
||||
self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
|
||||
return
|
||||
|
||||
fb_dtsg = self._search_regex(
|
||||
r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
|
||||
h = self._search_regex(
|
||||
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
|
||||
|
||||
if not fb_dtsg or not h:
|
||||
return
|
||||
|
||||
check_form = {
|
||||
'fb_dtsg': fb_dtsg,
|
||||
'h': h,
|
||||
'name_action_selected': 'dont_save',
|
||||
}
|
||||
check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
|
||||
except network_exceptions as err:
|
||||
self.report_warning(f'unable to log in: {err}')
|
||||
return
|
||||
|
||||
def _extract_from_url(self, url, video_id):
|
||||
webpage = self._download_webpage(
|
||||
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
|
||||
|
||||
@@ -5,7 +5,6 @@ from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
@@ -206,9 +205,6 @@ class FC2LiveIE(InfoExtractor):
|
||||
'client_app': 'browser_hls',
|
||||
'ipv6': '',
|
||||
}), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
# A non-zero 'status' indicates the stream is not live, so check truthiness
|
||||
if traverse_obj(control_server, ('status', {int})) and 'control_token' not in control_server:
|
||||
raise UserNotLive(video_id=video_id)
|
||||
self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
|
||||
|
||||
ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
find_elements,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class FilmArchivIE(InfoExtractor):
|
||||
IE_DESC = 'FILMARCHIV ON'
|
||||
_VALID_URL = r'https?://(?:www\.)?filmarchiv\.at/de/filmarchiv-on/video/(?P<id>f_[0-9a-zA-Z]{5,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.filmarchiv.at/de/filmarchiv-on/video/f_0305p7xKrXUPBwoNE9x6mh',
|
||||
'md5': '54a6596f6a84624531866008a77fa27a',
|
||||
'info_dict': {
|
||||
'id': 'f_0305p7xKrXUPBwoNE9x6mh',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Wurstelprater zur Kaiserzeit',
|
||||
'description': 'md5:9843f92df5cc9a4975cee7aabcf6e3b2',
|
||||
'thumbnail': r're:https://cdn\.filmarchiv\.at/f_0305/p7xKrXUPBwoNE9x6mh_v1/poster\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.filmarchiv.at/de/filmarchiv-on/video/f_0306vI3wO0tJIsfrqYFQXF',
|
||||
'md5': '595385d7f54cb6529140ee8de7d1c3c7',
|
||||
'info_dict': {
|
||||
'id': 'f_0306vI3wO0tJIsfrqYFQXF',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vor 70 Jahren: Wettgehen der Briefträger in Wien',
|
||||
'description': 'md5:b2a2e4230923cd1969d471c552e62811',
|
||||
'thumbnail': r're:https://cdn\.filmarchiv\.at/f_0306/vI3wO0tJIsfrqYFQXF_v1/poster\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
path = '/'.join((media_id[:6], media_id[6:]))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://cdn.filmarchiv.at/{path}_v1_sv1/playlist.m3u8', media_id)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': traverse_obj(webpage, ({find_element(tag='title-div')}, {clean_html})),
|
||||
'description': traverse_obj(webpage, (
|
||||
{find_elements(tag='div', attr='class', value=r'.*\bborder-base-content\b', regex=True)}, ...,
|
||||
{find_elements(tag='div', attr='class', value=r'.*\bprose\b', html=False, regex=True)}, ...,
|
||||
{clean_html}, any)),
|
||||
'thumbnail': f'https://cdn.filmarchiv.at/{path}_v1/poster.jpg',
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -1,164 +0,0 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FrontoBaseIE(InfoExtractor):
|
||||
def _get_auth_headers(self, url):
|
||||
return traverse_obj(self._get_cookies(url), {
|
||||
'authorization': ('frAccessToken', 'value', {lambda token: f'Bearer {token}' if token else None}),
|
||||
})
|
||||
|
||||
|
||||
class FrontroVideoBaseIE(FrontoBaseIE):
|
||||
_CHANNEL_ID = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.frontrow.cc/query', video_id, data=json.dumps({
|
||||
'operationName': 'Video',
|
||||
'variables': {'channelID': self._CHANNEL_ID, 'videoID': video_id},
|
||||
'query': '''query Video($channelID: ID!, $videoID: ID!) {
|
||||
video(ChannelID: $channelID, VideoID: $videoID) {
|
||||
... on Video {title description updatedAt thumbnail createdAt duration likeCount comments views url hasAccess}
|
||||
}
|
||||
}''',
|
||||
}).encode(), headers={
|
||||
'content-type': 'application/json',
|
||||
**self._get_auth_headers(url),
|
||||
})['data']['video']
|
||||
if not traverse_obj(metadata, 'hasAccess'):
|
||||
self.raise_login_required()
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(metadata['url'], video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class FrontroGroupBaseIE(FrontoBaseIE):
|
||||
_CHANNEL_ID = None
|
||||
_VIDEO_EXTRACTOR = None
|
||||
_VIDEO_URL_TMPL = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.frontrow.cc/query', group_id, note='Downloading playlist metadata',
|
||||
data=json.dumps({
|
||||
'operationName': 'PaginatedStaticPageContainer',
|
||||
'variables': {'channelID': self._CHANNEL_ID, 'first': 500, 'pageContainerID': group_id},
|
||||
'query': '''query PaginatedStaticPageContainer($channelID: ID!, $pageContainerID: ID!) {
|
||||
pageContainer(ChannelID: $channelID, PageContainerID: $pageContainerID) {
|
||||
... on StaticPageContainer { id title updatedAt createdAt itemRefs {edges {node {
|
||||
id contentItem { ... on ItemVideo { videoItem: item {
|
||||
id
|
||||
}}}
|
||||
}}}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
}).encode(), headers={
|
||||
'content-type': 'application/json',
|
||||
**self._get_auth_headers(url),
|
||||
})['data']['pageContainer']
|
||||
|
||||
entries = []
|
||||
for video_id in traverse_obj(metadata, (
|
||||
'itemRefs', 'edges', ..., 'node', 'contentItem', 'videoItem', 'id', {str}),
|
||||
):
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_URL_TMPL % video_id, self._VIDEO_EXTRACTOR, video_id))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': group_id,
|
||||
'entries': entries,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class TheChosenIE(FrontroVideoBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/video/184683594325',
|
||||
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
||||
'info_dict': {
|
||||
'id': '184683594325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 3 Episode 2: Two by Two',
|
||||
'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4212,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/',
|
||||
'timestamp': 1698954546,
|
||||
'upload_date': '20231102',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.thechosen.tv/video/184683596189',
|
||||
'md5': 'd581562f9d29ce82f5b7770415334151',
|
||||
'info_dict': {
|
||||
'id': '184683596189',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 4 Episode 8: Humble',
|
||||
'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 5092,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/',
|
||||
'timestamp': 1715019474,
|
||||
'upload_date': '20240506',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TheChosenGroupIE(FrontroGroupBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
_VIDEO_EXTRACTOR = TheChosenIE
|
||||
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/video/%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/group/309237658592',
|
||||
'info_dict': {
|
||||
'id': '309237658592',
|
||||
'title': 'Season 3',
|
||||
'timestamp': 1746203969,
|
||||
'upload_date': '20250502',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
@@ -821,17 +821,13 @@ class GenericIE(InfoExtractor):
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||
raise
|
||||
res = e.cause.response
|
||||
already_impersonating = res.extensions.get('impersonate') is not None
|
||||
if already_impersonating or (
|
||||
res.get_header('cf-mitigated') != 'challenge'
|
||||
and b'<title>Attention Required! | Cloudflare</title>' not in res.read()
|
||||
):
|
||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||
and e.cause.response.extensions.get('impersonate') is None):
|
||||
raise
|
||||
cf_cookie_domain = traverse_obj(
|
||||
LenientSimpleCookie(res.get_header('set-cookie')), ('__cf_bm', 'domain'))
|
||||
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||
('__cf_bm', 'domain'))
|
||||
if cf_cookie_domain:
|
||||
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||
|
||||
@@ -46,7 +46,6 @@ class GofileIE(InfoExtractor):
|
||||
'videopassword': 'password',
|
||||
},
|
||||
}]
|
||||
_STATIC_TOKEN = '4fd6sg89d7s6' # From https://gofile.io/dist/js/config.js
|
||||
_TOKEN = None
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -61,16 +60,13 @@ class GofileIE(InfoExtractor):
|
||||
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
|
||||
|
||||
def _entries(self, file_id):
|
||||
query_params = {}
|
||||
if password := self.get_param('videopassword'):
|
||||
query_params = {'wt': '4fd6sg89d7s6'} # From https://gofile.io/dist/js/alljs.js
|
||||
password = self.get_param('videopassword')
|
||||
if password:
|
||||
query_params['password'] = hashlib.sha256(password.encode()).hexdigest()
|
||||
|
||||
files = self._download_json(
|
||||
f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist',
|
||||
query=query_params, headers={
|
||||
'Authorization': f'Bearer {self._TOKEN}',
|
||||
'X-Website-Token': self._STATIC_TOKEN,
|
||||
})
|
||||
query=query_params, headers={'Authorization': f'Bearer {self._TOKEN}'})
|
||||
|
||||
status = files['status']
|
||||
if status == 'error-passwordRequired':
|
||||
|
||||
@@ -27,7 +27,7 @@ class HotStarBaseIE(InfoExtractor):
|
||||
_TOKEN_NAME = 'userUP'
|
||||
_BASE_URL = 'https://www.hotstar.com'
|
||||
_API_URL = 'https://api.hotstar.com'
|
||||
_API_URL_V2 = 'https://www.hotstar.com/api/internal/bff/v2'
|
||||
_API_URL_V2 = 'https://apix.hotstar.com/v2'
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
_FREE_HEADERS = {
|
||||
|
||||
@@ -9,12 +9,14 @@ from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
decode_packed_codes,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
ohdave_rsa_encrypt,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
@@ -31,12 +33,143 @@ def md5_text(text):
|
||||
return hashlib.md5(text.encode()).hexdigest()
|
||||
|
||||
|
||||
class IqiyiSDK:
|
||||
def __init__(self, target, ip, timestamp):
|
||||
self.target = target
|
||||
self.ip = ip
|
||||
self.timestamp = timestamp
|
||||
|
||||
@staticmethod
|
||||
def split_sum(data):
|
||||
return str(sum(int(p, 16) for p in data))
|
||||
|
||||
@staticmethod
|
||||
def digit_sum(num):
|
||||
if isinstance(num, int):
|
||||
num = str(num)
|
||||
return str(sum(map(int, num)))
|
||||
|
||||
def even_odd(self):
|
||||
even = self.digit_sum(str(self.timestamp)[::2])
|
||||
odd = self.digit_sum(str(self.timestamp)[1::2])
|
||||
return even, odd
|
||||
|
||||
def preprocess(self, chunksize):
|
||||
self.target = md5_text(self.target)
|
||||
chunks = []
|
||||
for i in range(32 // chunksize):
|
||||
chunks.append(self.target[chunksize * i:chunksize * (i + 1)])
|
||||
if 32 % chunksize:
|
||||
chunks.append(self.target[32 - 32 % chunksize:])
|
||||
return chunks, list(map(int, self.ip.split('.')))
|
||||
|
||||
def mod(self, modulus):
|
||||
chunks, ip = self.preprocess(32)
|
||||
self.target = chunks[0] + ''.join(str(p % modulus) for p in ip)
|
||||
|
||||
def split(self, chunksize):
|
||||
modulus_map = {
|
||||
4: 256,
|
||||
5: 10,
|
||||
8: 100,
|
||||
}
|
||||
|
||||
chunks, ip = self.preprocess(chunksize)
|
||||
ret = ''
|
||||
for i in range(len(chunks)):
|
||||
ip_part = str(ip[i] % modulus_map[chunksize]) if i < 4 else ''
|
||||
if chunksize == 8:
|
||||
ret += ip_part + chunks[i]
|
||||
else:
|
||||
ret += chunks[i] + ip_part
|
||||
self.target = ret
|
||||
|
||||
def handle_input16(self):
|
||||
self.target = md5_text(self.target)
|
||||
self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:])
|
||||
|
||||
def handle_input8(self):
|
||||
self.target = md5_text(self.target)
|
||||
ret = ''
|
||||
for i in range(4):
|
||||
part = self.target[8 * i:8 * (i + 1)]
|
||||
ret += self.split_sum(part) + part
|
||||
self.target = ret
|
||||
|
||||
def handleSum(self):
|
||||
self.target = md5_text(self.target)
|
||||
self.target = self.split_sum(self.target) + self.target
|
||||
|
||||
def date(self, scheme):
|
||||
self.target = md5_text(self.target)
|
||||
d = time.localtime(self.timestamp)
|
||||
strings = {
|
||||
'y': str(d.tm_year),
|
||||
'm': '%02d' % d.tm_mon,
|
||||
'd': '%02d' % d.tm_mday,
|
||||
}
|
||||
self.target += ''.join(strings[c] for c in scheme)
|
||||
|
||||
def split_time_even_odd(self):
|
||||
even, odd = self.even_odd()
|
||||
self.target = odd + md5_text(self.target) + even
|
||||
|
||||
def split_time_odd_even(self):
|
||||
even, odd = self.even_odd()
|
||||
self.target = even + md5_text(self.target) + odd
|
||||
|
||||
def split_ip_time_sum(self):
|
||||
chunks, ip = self.preprocess(32)
|
||||
self.target = str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp)
|
||||
|
||||
def split_time_ip_sum(self):
|
||||
chunks, ip = self.preprocess(32)
|
||||
self.target = self.digit_sum(self.timestamp) + chunks[0] + str(sum(ip))
|
||||
|
||||
|
||||
class IqiyiSDKInterpreter:
|
||||
def __init__(self, sdk_code):
|
||||
self.sdk_code = sdk_code
|
||||
|
||||
def run(self, target, ip, timestamp):
|
||||
self.sdk_code = decode_packed_codes(self.sdk_code)
|
||||
|
||||
functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
|
||||
|
||||
sdk = IqiyiSDK(target, ip, timestamp)
|
||||
|
||||
other_functions = {
|
||||
'handleSum': sdk.handleSum,
|
||||
'handleInput8': sdk.handle_input8,
|
||||
'handleInput16': sdk.handle_input16,
|
||||
'splitTimeEvenOdd': sdk.split_time_even_odd,
|
||||
'splitTimeOddEven': sdk.split_time_odd_even,
|
||||
'splitIpTimeSum': sdk.split_ip_time_sum,
|
||||
'splitTimeIpSum': sdk.split_time_ip_sum,
|
||||
}
|
||||
for function in functions:
|
||||
if re.match(r'mod\d+', function):
|
||||
sdk.mod(int(function[3:]))
|
||||
elif re.match(r'date[ymd]{3}', function):
|
||||
sdk.date(function[4:])
|
||||
elif re.match(r'split\d+', function):
|
||||
sdk.split(int(function[5:]))
|
||||
elif function in other_functions:
|
||||
other_functions[function]()
|
||||
else:
|
||||
raise ExtractorError(f'Unknown function {function}')
|
||||
|
||||
return sdk.target
|
||||
|
||||
|
||||
class IqiyiIE(InfoExtractor):
|
||||
IE_NAME = 'iqiyi'
|
||||
IE_DESC = '爱奇艺'
|
||||
|
||||
_VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
|
||||
|
||||
_NETRC_MACHINE = 'iqiyi'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
|
||||
# MD5 checksum differs on my machine and Travis CI
|
||||
@@ -101,6 +234,57 @@ class IqiyiIE(InfoExtractor):
|
||||
'18': 7, # 1080p
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _rsa_fun(data):
|
||||
# public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
|
||||
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
|
||||
e = 65537
|
||||
|
||||
return ohdave_rsa_encrypt(data, e, N)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
|
||||
data = self._download_json(
|
||||
'http://kylin.iqiyi.com/get_token', None,
|
||||
note='Get token for logging', errnote='Unable to get token for logging')
|
||||
sdk = data['sdk']
|
||||
timestamp = int(time.time())
|
||||
target = (
|
||||
f'/apis/reglogin/login.action?lang=zh_TW&area_code=null&email={username}'
|
||||
f'&passwd={self._rsa_fun(password.encode())}&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1')
|
||||
|
||||
interp = IqiyiSDKInterpreter(sdk)
|
||||
sign = interp.run(target, data['ip'], timestamp)
|
||||
|
||||
validation_params = {
|
||||
'target': target,
|
||||
'server': 'BEA3AA1908656AABCCFF76582C4C6660',
|
||||
'token': data['token'],
|
||||
'bird_src': 'f8d91d57af224da7893dd397d52d811a',
|
||||
'sign': sign,
|
||||
'bird_t': timestamp,
|
||||
}
|
||||
validation_result = self._download_json(
|
||||
'http://kylin.iqiyi.com/validate?' + urllib.parse.urlencode(validation_params), None,
|
||||
note='Validate credentials', errnote='Unable to validate credentials')
|
||||
|
||||
MSG_MAP = {
|
||||
'P00107': 'please login via the web interface and enter the CAPTCHA code',
|
||||
'P00117': 'bad username or password',
|
||||
}
|
||||
|
||||
code = validation_result['code']
|
||||
if code != 'A00000':
|
||||
msg = MSG_MAP.get(code)
|
||||
if not msg:
|
||||
msg = f'error {code}'
|
||||
if validation_result.get('msg'):
|
||||
msg += ': ' + validation_result['msg']
|
||||
self.report_warning('unable to log in: ' + msg)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_raw_data(self, tvid, video_id):
|
||||
tm = int(time.time() * 1000)
|
||||
|
||||
|
||||
@@ -8,10 +8,12 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
get_first,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
update_url,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
@@ -49,7 +51,7 @@ class LoomIE(InfoExtractor):
|
||||
}, {
|
||||
# m3u8 raw-url, mp4 transcoded-url, cdn url == raw-url, vtt sub and json subs
|
||||
'url': 'https://www.loom.com/share/9458bcbf79784162aa62ffb8dd66201b',
|
||||
'md5': '7b6bfdef8181c4ffc376e18919a4dcc2',
|
||||
'md5': '51737ec002969dd28344db4d60b9cbbb',
|
||||
'info_dict': {
|
||||
'id': '9458bcbf79784162aa62ffb8dd66201b',
|
||||
'ext': 'mp4',
|
||||
@@ -69,13 +71,12 @@ class LoomIE(InfoExtractor):
|
||||
'ext': 'webm',
|
||||
'title': 'OMFG clown',
|
||||
'description': 'md5:285c5ee9d62aa087b7e3271b08796815',
|
||||
'uploader': 'Brailey Bragg',
|
||||
'uploader': 'MrPumkin B',
|
||||
'upload_date': '20210924',
|
||||
'timestamp': 1632519618,
|
||||
'duration': 210,
|
||||
},
|
||||
'params': {'skip_download': 'dash'},
|
||||
'expected_warnings': ['Failed to parse JSON'], # transcoded-url no longer available
|
||||
}, {
|
||||
# password-protected
|
||||
'url': 'https://www.loom.com/share/50e26e8aeb7940189dff5630f95ce1f4',
|
||||
@@ -90,11 +91,10 @@ class LoomIE(InfoExtractor):
|
||||
'duration': 35,
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
'expected_warnings': ['Failed to parse JSON'], # transcoded-url no longer available
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': 'f983a0f02f24331738b2f43aecb05256',
|
||||
'md5': 'b321d261656848c184a94e3b93eae28d',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
@@ -119,12 +119,11 @@ class LoomIE(InfoExtractor):
|
||||
'duration': 247,
|
||||
'timestamp': 1676274030,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
_GRAPHQL_VARIABLES = {
|
||||
'GetVideoSource': {
|
||||
'acceptableMimes': ['DASH', 'M3U8', 'MP4', 'WEBM'],
|
||||
'acceptableMimes': ['DASH', 'M3U8', 'MP4'],
|
||||
},
|
||||
}
|
||||
_GRAPHQL_QUERIES = {
|
||||
@@ -193,12 +192,6 @@ class LoomIE(InfoExtractor):
|
||||
id
|
||||
nullableRawCdnUrl(acceptableMimes: $acceptableMimes, password: $password) {
|
||||
url
|
||||
credentials {
|
||||
Policy
|
||||
Signature
|
||||
KeyPairId
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
@@ -247,9 +240,9 @@ class LoomIE(InfoExtractor):
|
||||
}
|
||||
}\n'''),
|
||||
}
|
||||
_APOLLO_GRAPHQL_VERSION = '45a5bd4'
|
||||
_APOLLO_GRAPHQL_VERSION = '0a1856c'
|
||||
|
||||
def _call_graphql_api(self, operation_name, video_id, note=None, errnote=None, fatal=True):
|
||||
def _call_graphql_api(self, operations, video_id, note=None, errnote=None):
|
||||
password = self.get_param('videopassword')
|
||||
return self._download_json(
|
||||
'https://www.loom.com/graphql', video_id, note or 'Downloading GraphQL JSON',
|
||||
@@ -259,9 +252,7 @@ class LoomIE(InfoExtractor):
|
||||
'x-loom-request-source': f'loom_web_{self._APOLLO_GRAPHQL_VERSION}',
|
||||
'apollographql-client-name': 'web',
|
||||
'apollographql-client-version': self._APOLLO_GRAPHQL_VERSION,
|
||||
'graphql-operation-name': operation_name,
|
||||
'Origin': 'https://www.loom.com',
|
||||
}, data=json.dumps({
|
||||
}, data=json.dumps([{
|
||||
'operationName': operation_name,
|
||||
'variables': {
|
||||
'videoId': video_id,
|
||||
@@ -269,7 +260,7 @@ class LoomIE(InfoExtractor):
|
||||
**self._GRAPHQL_VARIABLES.get(operation_name, {}),
|
||||
},
|
||||
'query': self._GRAPHQL_QUERIES[operation_name],
|
||||
}, separators=(',', ':')).encode(), fatal=fatal)
|
||||
} for operation_name in variadic(operations)], separators=(',', ':')).encode())
|
||||
|
||||
def _call_url_api(self, endpoint, video_id):
|
||||
response = self._download_json(
|
||||
@@ -284,7 +275,7 @@ class LoomIE(InfoExtractor):
|
||||
}, separators=(',', ':')).encode())
|
||||
return traverse_obj(response, ('url', {url_or_none}))
|
||||
|
||||
def _extract_formats(self, video_id, metadata, video_data):
|
||||
def _extract_formats(self, video_id, metadata, gql_data):
|
||||
formats = []
|
||||
video_properties = traverse_obj(metadata, ('video_properties', {
|
||||
'width': ('width', {int_or_none}),
|
||||
@@ -339,7 +330,7 @@ class LoomIE(InfoExtractor):
|
||||
transcoded_url = self._call_url_api('transcoded-url', video_id)
|
||||
formats.extend(get_formats(transcoded_url, 'transcoded', quality=-1)) # transcoded quality
|
||||
|
||||
cdn_url = traverse_obj(video_data, ('data', 'getVideo', 'nullableRawCdnUrl', 'url', {url_or_none}))
|
||||
cdn_url = get_first(gql_data, ('data', 'getVideo', 'nullableRawCdnUrl', 'url', {url_or_none}))
|
||||
# cdn_url is usually a dupe, but the raw-url/transcoded-url endpoints could return errors
|
||||
valid_urls = [update_url(url, query=None) for url in (raw_url, transcoded_url) if url]
|
||||
if cdn_url and update_url(cdn_url, query=None) not in valid_urls:
|
||||
@@ -347,21 +338,10 @@ class LoomIE(InfoExtractor):
|
||||
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, video_id):
|
||||
subs_data = self._call_graphql_api(
|
||||
'FetchVideoTranscript', video_id, 'Downloading GraphQL subtitles JSON', fatal=False)
|
||||
return filter_dict({
|
||||
'en': traverse_obj(subs_data, (
|
||||
'data', 'fetchVideoTranscript',
|
||||
('source_url', 'captions_source_url'), {
|
||||
'url': {url_or_none},
|
||||
})) or None,
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = traverse_obj(
|
||||
self._call_graphql_api('GetVideoSSR', video_id, 'Downloading GraphQL metadata JSON', fatal=False),
|
||||
metadata = get_first(
|
||||
self._call_graphql_api('GetVideoSSR', video_id, 'Downloading GraphQL metadata JSON'),
|
||||
('data', 'getVideo', {dict})) or {}
|
||||
|
||||
if metadata.get('__typename') == 'VideoPasswordMissingOrIncorrect':
|
||||
@@ -370,19 +350,22 @@ class LoomIE(InfoExtractor):
|
||||
'This video is password-protected, use the --video-password option', expected=True)
|
||||
raise ExtractorError('Invalid video password', expected=True)
|
||||
|
||||
video_data = self._call_graphql_api(
|
||||
'GetVideoSource', video_id, 'Downloading GraphQL video JSON')
|
||||
chapter_data = self._call_graphql_api(
|
||||
'FetchChapters', video_id, 'Downloading GraphQL chapters JSON', fatal=False)
|
||||
gql_data = self._call_graphql_api(['FetchChapters', 'FetchVideoTranscript', 'GetVideoSource'], video_id)
|
||||
duration = traverse_obj(metadata, ('video_properties', 'duration', {int_or_none}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'duration': duration,
|
||||
'chapters': self._extract_chapters_from_description(
|
||||
traverse_obj(chapter_data, ('data', 'fetchVideoChapters', 'content', {str})), duration) or None,
|
||||
'formats': self._extract_formats(video_id, metadata, video_data),
|
||||
'subtitles': self.extract_subtitles(video_id),
|
||||
get_first(gql_data, ('data', 'fetchVideoChapters', 'content', {str})), duration) or None,
|
||||
'formats': self._extract_formats(video_id, metadata, gql_data),
|
||||
'subtitles': filter_dict({
|
||||
'en': traverse_obj(gql_data, (
|
||||
..., 'data', 'fetchVideoTranscript',
|
||||
('source_url', 'captions_source_url'), {
|
||||
'url': {url_or_none},
|
||||
})) or None,
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
@@ -393,7 +376,6 @@ class LoomIE(InfoExtractor):
|
||||
|
||||
|
||||
class LoomFolderIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'loom:folder'
|
||||
_VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P<id>[\da-f]{32})'
|
||||
_TESTS = [{
|
||||
|
||||
128
yt_dlp/extractor/manoto.py
Normal file
128
yt_dlp/extractor/manoto.py
Normal file
@@ -0,0 +1,128 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, int_or_none, traverse_obj
|
||||
|
||||
_API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}'
|
||||
|
||||
|
||||
class ManotoTVIE(InfoExtractor):
|
||||
IE_DESC = 'Manoto TV (Episode)'
|
||||
_VALID_URL = r'https?://(?:www\.)?manototv\.com/episode/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.manototv.com/episode/8475',
|
||||
'info_dict': {
|
||||
'id': '8475',
|
||||
'series': 'خانه های رویایی با برادران اسکات',
|
||||
'season_number': 7,
|
||||
'episode_number': 25,
|
||||
'episode_id': 'My Dream Home S7: Carol & John',
|
||||
'duration': 3600,
|
||||
'categories': ['سرگرمی'],
|
||||
'title': 'کارول و جان',
|
||||
'description': 'md5:d0fff1f8ba5c6775d312a00165d1a97e',
|
||||
'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.manototv.com/episode/12576',
|
||||
'info_dict': {
|
||||
'id': '12576',
|
||||
'series': 'فیلم های ایرانی',
|
||||
'episode_id': 'Seh Mah Taatili',
|
||||
'duration': 5400,
|
||||
'view_count': int,
|
||||
'categories': ['سرگرمی'],
|
||||
'title': 'سه ماه تعطیلی',
|
||||
'description': 'سه ماه تعطیلی فیلمی به کارگردانی و نویسندگی شاپور قریب ساختهٔ سال ۱۳۵۶ است.',
|
||||
'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
episode_json = self._download_json(_API_URL.format('showmodule', 'episodedetails', video_id), video_id)
|
||||
details = episode_json.get('details', {})
|
||||
formats = self._extract_m3u8_formats(details.get('videoM3u8Url'), video_id, 'mp4')
|
||||
return {
|
||||
'id': video_id,
|
||||
'series': details.get('showTitle'),
|
||||
'season_number': int_or_none(details.get('analyticsSeasonNumber')),
|
||||
'episode_number': int_or_none(details.get('episodeNumber')),
|
||||
'episode_id': details.get('analyticsEpisodeTitle'),
|
||||
'duration': int_or_none(details.get('durationInMinutes'), invscale=60),
|
||||
'view_count': details.get('viewCount'),
|
||||
'categories': [details.get('videoCategory')],
|
||||
'title': details.get('episodeTitle'),
|
||||
'description': clean_html(details.get('episodeDescription')),
|
||||
'thumbnail': details.get('episodelandscapeImgIxUrl'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ManotoTVShowIE(InfoExtractor):
|
||||
IE_DESC = 'Manoto TV (Show)'
|
||||
_VALID_URL = r'https?://(?:www\.)?manototv\.com/show/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.manototv.com/show/2526',
|
||||
'playlist_mincount': 68,
|
||||
'info_dict': {
|
||||
'id': '2526',
|
||||
'title': 'فیلم های ایرانی',
|
||||
'description': 'مجموعه ای از فیلم های سینمای کلاسیک ایران',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
show_json = self._download_json(_API_URL.format('showmodule', 'details', show_id), show_id)
|
||||
show_details = show_json.get('details', {})
|
||||
title = show_details.get('showTitle')
|
||||
description = show_details.get('showSynopsis')
|
||||
|
||||
series_json = self._download_json(_API_URL.format('showmodule', 'serieslist', show_id), show_id)
|
||||
playlist_id = str(traverse_obj(series_json, ('details', 'list', 0, 'id')))
|
||||
|
||||
playlist_json = self._download_json(_API_URL.format('showmodule', 'episodelist', playlist_id), playlist_id)
|
||||
playlist = traverse_obj(playlist_json, ('details', 'list')) or []
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.manototv.com/episode/{}'.format(item['slideID']), ie=ManotoTVIE.ie_key(), video_id=item['slideID'])
|
||||
for item in playlist]
|
||||
return self.playlist_result(entries, show_id, title, description)
|
||||
|
||||
|
||||
class ManotoTVLiveIE(InfoExtractor):
|
||||
IE_DESC = 'Manoto TV (Live)'
|
||||
_VALID_URL = r'https?://(?:www\.)?manototv\.com/live/'
|
||||
_TEST = {
|
||||
'url': 'https://www.manototv.com/live/',
|
||||
'info_dict': {
|
||||
'id': 'live',
|
||||
'title': 'Manoto TV Live',
|
||||
'ext': 'mp4',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'live'
|
||||
json = self._download_json(_API_URL.format('livemodule', 'details', ''), video_id)
|
||||
details = json.get('details', {})
|
||||
video_url = details.get('liveUrl')
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'Manoto TV Live',
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
import functools
|
||||
import math
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@@ -12,64 +10,15 @@ from ..utils import (
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class MaveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://api.mave.digital/v1/website'
|
||||
_API_BASE_STORAGE_URL = 'https://store.cloud.mts.ru/mave/'
|
||||
|
||||
def _load_channel_meta(self, channel_id, display_id):
|
||||
return traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/', display_id,
|
||||
note='Downloading channel metadata'), 'podcast')
|
||||
|
||||
def _load_episode_meta(self, channel_id, episode_code, display_id):
|
||||
return self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/episodes/{episode_code}',
|
||||
display_id, note='Downloading episode metadata')
|
||||
|
||||
def _create_entry(self, channel_id, channel_meta, episode_meta):
|
||||
episode_code = traverse_obj(episode_meta, ('code', {int}, {require('episode code')}))
|
||||
return {
|
||||
'display_id': f'{channel_id}-{episode_code}',
|
||||
'extractor_key': MaveIE.ie_key(),
|
||||
'extractor': MaveIE.IE_NAME,
|
||||
'webpage_url': f'https://{channel_id}.mave.digital/ep-{episode_code}',
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(episode_meta, {
|
||||
'id': ('id', {str}),
|
||||
'url': ('audio', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'thumbnail': ('image', {urljoin(self._API_BASE_STORAGE_URL)}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
}),
|
||||
**traverse_obj(channel_meta, {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class MaveIE(MaveBaseIE):
|
||||
IE_NAME = 'mave'
|
||||
_VALID_URL = r'https?://(?P<channel_id>[\w-]+)\.mave\.digital/ep-(?P<episode_code>\d+)'
|
||||
class MaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||
'info_dict': {
|
||||
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ochenlichnoe-25',
|
||||
'display_id': 'ochenlichnoe-ep-25',
|
||||
'title': 'Между мной и миром: психология самооценки',
|
||||
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
||||
'uploader': 'Самарский университет',
|
||||
@@ -96,7 +45,7 @@ class MaveIE(MaveBaseIE):
|
||||
'info_dict': {
|
||||
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'budem-12',
|
||||
'display_id': 'budem-ep-12',
|
||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
||||
'uploader': 'Полина Цветкова+Евгения Акопова',
|
||||
@@ -119,72 +68,40 @@ class MaveIE(MaveBaseIE):
|
||||
'upload_date': '20241230',
|
||||
},
|
||||
}]
|
||||
_API_BASE_URL = 'https://api.mave.digital/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, episode_code = self._match_valid_url(url).group(
|
||||
'channel_id', 'episode_code')
|
||||
display_id = f'{channel_id}-{episode_code}'
|
||||
|
||||
channel_meta = self._load_channel_meta(channel_id, display_id)
|
||||
episode_meta = self._load_episode_meta(channel_id, episode_code, display_id)
|
||||
|
||||
return self._create_entry(channel_id, channel_meta, episode_meta)
|
||||
|
||||
|
||||
class MaveChannelIE(MaveBaseIE):
|
||||
IE_NAME = 'mave:channel'
|
||||
_VALID_URL = r'https?://(?P<id>[\w-]+)\.mave\.digital/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://budem.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'budem',
|
||||
'title': 'Все там будем',
|
||||
'description': 'md5:f04ae12a42be0f1d765c5e326b41987a',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://ochenlichnoe.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'ochenlichnoe',
|
||||
'title': 'Очень личное',
|
||||
'description': 'md5:ee36a6a52546b91b487fe08c552fdbb2',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'https://geekcity.mave.digital/',
|
||||
'info_dict': {
|
||||
'id': 'geekcity',
|
||||
'title': 'Мужчины в трико',
|
||||
'description': 'md5:4164d425d60a0d97abdce9d1f6f8e049',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _entries(self, channel_id, channel_meta, page_num):
|
||||
page_data = self._download_json(
|
||||
f'{self._API_BASE_URL}/{channel_id}/episodes', channel_id, query={
|
||||
'view': 'all',
|
||||
'page': page_num + 1,
|
||||
'sort': 'newest',
|
||||
'format': 'all',
|
||||
}, note=f'Downloading page {page_num + 1}')
|
||||
for ep in traverse_obj(page_data, ('episodes', lambda _, v: v['audio'] and v['id'])):
|
||||
yield self._create_entry(channel_id, channel_meta, ep)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_meta = self._load_channel_meta(channel_id, channel_id)
|
||||
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
|
||||
display_id = f'{channel_id}-{slug}'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = traverse_obj(
|
||||
self._search_nuxt_json(webpage, display_id),
|
||||
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': channel_id,
|
||||
**traverse_obj(channel_meta, {
|
||||
'display_id': display_id,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
|
||||
**traverse_obj(data, ('activeEpisodeData', {
|
||||
'url': ('audio', {urljoin(self._API_BASE_URL)}),
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
}),
|
||||
'entries': InAdvancePagedList(
|
||||
functools.partial(self._entries, channel_id, channel_meta),
|
||||
math.ceil(channel_meta['episodes_count'] / self._PAGE_SIZE), self._PAGE_SIZE),
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(data, ('podcast', 'podcast', {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
})),
|
||||
}
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
@@ -25,8 +30,25 @@ class MedalTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 13,
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/ugcp/content-thumbnail/.*\.jpg',
|
||||
'tags': ['headshot', 'valorant', '4k', 'clutch', 'mornu'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
|
||||
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': '10619174',
|
||||
'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing',
|
||||
'uploader_url': 'https://medal.tv/users/10619174',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 23,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
|
||||
@@ -35,90 +57,104 @@ class MedalTVIE(InfoExtractor):
|
||||
'id': '2um24TWdty0NA',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': '',
|
||||
'uploader': 'zahl',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': '5156321',
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/source/.*\.png',
|
||||
'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing',
|
||||
'uploader_url': 'https://medal.tv/users/5156321',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 9,
|
||||
},
|
||||
}, {
|
||||
# API requires auth
|
||||
'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
|
||||
'md5': '6c6bb6569777fd8b4ef7b33c09de8dcf',
|
||||
'info_dict': {
|
||||
'id': '2WRj40tpY_EU9',
|
||||
'ext': 'mp4',
|
||||
'title': '1v5 clutch',
|
||||
'description': '',
|
||||
'uploader': 'adny',
|
||||
'uploader_id': '6256941',
|
||||
'uploader_url': 'https://medal.tv/users/6256941',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 25,
|
||||
'thumbnail': r're:https://cdn\.medal\.tv/source/.*\.jpg',
|
||||
'timestamp': 1612896680,
|
||||
'upload_date': '20210209',
|
||||
},
|
||||
'expected_warnings': ['Video formats are not available through API'],
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
content_data = self._download_json(
|
||||
f'https://medal.tv/api/content/{video_id}', video_id,
|
||||
headers={'Accept': 'application/json'})
|
||||
webpage = self._download_webpage(url, video_id, query={'mobilebypass': 'true'})
|
||||
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
'next data', video_id, end_pattern='</script>', fatal=False)
|
||||
|
||||
clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
|
||||
title = clip['contentTitle']
|
||||
|
||||
source_width = int_or_none(clip.get('sourceWidth'))
|
||||
source_height = int_or_none(clip.get('sourceHeight'))
|
||||
|
||||
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||
|
||||
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||
item_id = item_id or '%dp' % height
|
||||
if item_id not in item_url:
|
||||
return
|
||||
container.append({
|
||||
'url': item_url,
|
||||
id_key: item_id,
|
||||
'width': round(aspect_ratio * height),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
formats = []
|
||||
if m3u8_url := url_or_none(content_data.get('contentUrlHls')):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
if http_url := url_or_none(content_data.get('contentUrl')):
|
||||
formats.append({
|
||||
'url': http_url,
|
||||
'format_id': 'http-source',
|
||||
'ext': 'mp4',
|
||||
'quality': 1,
|
||||
})
|
||||
formats = [fmt for fmt in formats if 'video/privacy-protected-guest' not in fmt['url']]
|
||||
if not formats:
|
||||
# Fallback, does not require auth
|
||||
self.report_warning('Video formats are not available through API, falling back to social video URL')
|
||||
urlh = self._request_webpage(
|
||||
f'https://medal.tv/api/content/{video_id}/socialVideoUrl', video_id,
|
||||
note='Checking social video URL')
|
||||
formats.append({
|
||||
'url': urlh.url,
|
||||
'format_id': 'social-video',
|
||||
'ext': 'mp4',
|
||||
'quality': -1,
|
||||
})
|
||||
thumbnails = []
|
||||
for k, v in clip.items():
|
||||
if not (v and isinstance(v, str)):
|
||||
continue
|
||||
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||
if not mobj:
|
||||
continue
|
||||
prefix = mobj.group(1)
|
||||
height = int_or_none(mobj.group(2))
|
||||
if prefix == 'contentUrl':
|
||||
add_item(
|
||||
formats, v, height or source_height,
|
||||
item_id=None if height else 'source')
|
||||
elif prefix == 'thumbnail':
|
||||
add_item(thumbnails, v, height, 'id')
|
||||
|
||||
error = clip.get('error')
|
||||
if not formats and error:
|
||||
if error == 404:
|
||||
self.raise_no_formats(
|
||||
'That clip does not exist.',
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'An unknown error occurred ({error}).',
|
||||
video_id=video_id)
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
|
||||
author_id = str_or_none(author.get('userId'))
|
||||
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
**traverse_obj(content_data, {
|
||||
'title': ('contentTitle', {str}),
|
||||
'description': ('contentDescription', {str}),
|
||||
'timestamp': ('created', {int_or_none(scale=1000)}),
|
||||
'duration': ('videoLengthSeconds', {int_or_none}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'uploader': ('poster', 'displayName', {str}),
|
||||
'uploader_id': ('poster', 'userId', {str}),
|
||||
'uploader_url': ('poster', 'userId', {str}, filter, {lambda x: x and f'https://medal.tv/users/{x}'}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
}),
|
||||
'thumbnails': thumbnails,
|
||||
'description': clip.get('contentDescription'),
|
||||
'uploader': author.get('displayName'),
|
||||
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||
'uploader_id': author_id,
|
||||
'uploader_url': author_url,
|
||||
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||
'view_count': int_or_none(clip.get('views')),
|
||||
'like_count': int_or_none(clip.get('likes')),
|
||||
'comment_count': int_or_none(clip.get('comments')),
|
||||
}
|
||||
|
||||
@@ -478,64 +478,3 @@ class NebulaChannelIE(NebulaBaseIE):
|
||||
playlist_id=collection_slug,
|
||||
playlist_title=channel.get('title'),
|
||||
playlist_description=channel.get('description'))
|
||||
|
||||
|
||||
class NebulaSeasonIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:season'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?P<series>[\w-]+)/season/(?P<season_number>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/jetlag/season/15',
|
||||
'info_dict': {
|
||||
'id': 'jetlag_15',
|
||||
'title': 'Tag: All Stars',
|
||||
'description': 'md5:5aa5b8abf3de71756448dc44ffebb674',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/jetlag/season/14',
|
||||
'info_dict': {
|
||||
'id': 'jetlag_14',
|
||||
'title': 'Snake',
|
||||
'description': 'md5:6da9040f1c2ac559579738bfb6919d1e',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/jetlag/season/13-5',
|
||||
'info_dict': {
|
||||
'id': 'jetlag_13-5',
|
||||
'title': 'Hide + Seek Across NYC',
|
||||
'description': 'md5:5b87bb9acc6dcdff289bb4c71a2ad59f',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _build_url_result(self, item):
|
||||
url = (
|
||||
traverse_obj(item, ('share_url', {url_or_none}))
|
||||
or urljoin('https://nebula.tv/', item.get('app_path'))
|
||||
or f'https://nebula.tv/videos/{item["slug"]}')
|
||||
return self.url_result(
|
||||
smuggle_url(url, {'id': item['id']}),
|
||||
NebulaIE, url_transparent=True,
|
||||
**self._extract_video_metadata(item))
|
||||
|
||||
def _entries(self, data):
|
||||
for episode in traverse_obj(data, ('episodes', lambda _, v: v['video']['id'], 'video')):
|
||||
yield self._build_url_result(episode)
|
||||
for extra in traverse_obj(data, ('extras', ..., 'items', lambda _, v: v['id'])):
|
||||
yield self._build_url_result(extra)
|
||||
for trailer in traverse_obj(data, ('trailers', lambda _, v: v['id'])):
|
||||
yield self._build_url_result(trailer)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series, season_id = self._match_valid_url(url).group('series', 'season_number')
|
||||
playlist_id = f'{series}_{season_id}'
|
||||
data = self._call_api(
|
||||
f'https://content.api.nebula.app/content/{series}/season/{season_id}', playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), playlist_id,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
}))
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NetAppBaseIE(InfoExtractor):
|
||||
_BC_URL = 'https://players.brightcove.net/6255154784001/default_default/index.html?videoId={}'
|
||||
|
||||
@staticmethod
|
||||
def _parse_metadata(item):
|
||||
return traverse_obj(item, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
})
|
||||
|
||||
|
||||
class NetAppVideoIE(NetAppBaseIE):
|
||||
_VALID_URL = r'https?://media\.netapp\.com/video-detail/(?P<id>[0-9a-f-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://media.netapp.com/video-detail/da25fc01-82ad-5284-95bc-26920200a222/seamless-storage-for-modern-kubernetes-deployments',
|
||||
'info_dict': {
|
||||
'id': '1843620950167202073',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seamless storage for modern Kubernetes deployments',
|
||||
'description': 'md5:1ee39e315243fe71fb90af2796037248',
|
||||
'uploader_id': '6255154784001',
|
||||
'duration': 2159.41,
|
||||
'thumbnail': r're:https://house-fastly-signed-us-east-1-prod\.brightcovecdn\.com/image/.*\.jpg',
|
||||
'tags': 'count:15',
|
||||
'timestamp': 1758213949,
|
||||
'upload_date': '20250918',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://media.netapp.com/video-detail/45593e5d-cf1c-5996-978c-c9081906e69f/unleash-ai-innovation-with-your-data-with-the-netapp-platform',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_uuid = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://api.media.netapp.com/client/detail/{video_uuid}', video_uuid)
|
||||
|
||||
brightcove_video_id = traverse_obj(metadata, (
|
||||
'sections', lambda _, v: v['type'] == 'Player', 'video', {str}, any, {require('brightcove video id')}))
|
||||
|
||||
video_item = traverse_obj(metadata, ('sections', lambda _, v: v['type'] == 'VideoDetail', any))
|
||||
|
||||
return self.url_result(
|
||||
self._BC_URL.format(brightcove_video_id), BrightcoveNewIE, brightcove_video_id,
|
||||
url_transparent=True, **self._parse_metadata(video_item))
|
||||
|
||||
|
||||
class NetAppCollectionIE(NetAppBaseIE):
|
||||
_VALID_URL = r'https?://media\.netapp\.com/collection/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://media.netapp.com/collection/9820e190-f2a6-47ac-9c0a-98e5e64234a4',
|
||||
'info_dict': {
|
||||
'title': 'Featured sessions',
|
||||
'id': '9820e190-f2a6-47ac-9c0a-98e5e64234a4',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
def _entries(self, metadata):
|
||||
for item in traverse_obj(metadata, ('items', lambda _, v: v['brightcoveVideoId'])):
|
||||
brightcove_video_id = item['brightcoveVideoId']
|
||||
yield self.url_result(
|
||||
self._BC_URL.format(brightcove_video_id), BrightcoveNewIE, brightcove_video_id,
|
||||
url_transparent=True, **self._parse_metadata(item))
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_uuid = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://api.media.netapp.com/client/collection/{collection_uuid}', collection_uuid)
|
||||
|
||||
return self.playlist_result(self._entries(metadata), collection_uuid, playlist_title=metadata.get('name'))
|
||||
@@ -528,7 +528,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:program'
|
||||
IE_DESC = '网易云音乐 - 电台节目'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:dj|program)\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/program?id=10109055',
|
||||
'info_dict': {
|
||||
@@ -572,9 +572,6 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://music.163.com/#/dj?id=3706179315',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -2,59 +2,84 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NetzkinoIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/details/(?P<id>[^/?#]+)'
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.netzkino.de/details/snow-beast',
|
||||
'md5': '1a4c90fe40d3ccabce163287e45e56dd',
|
||||
'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond',
|
||||
'md5': '92a3f8b76f8d7220acce5377ea5d4873',
|
||||
'info_dict': {
|
||||
'id': 'snow-beast',
|
||||
'id': 'rakete-zum-mond',
|
||||
'ext': 'mp4',
|
||||
'title': 'Snow Beast',
|
||||
'title': 'Rakete zum Mond \u2013 Jules Verne',
|
||||
'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
|
||||
'upload_date': '20120813',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'timestamp': 1344858571,
|
||||
'age_limit': 12,
|
||||
'alt_title': 'Snow Beast',
|
||||
'cast': 'count:3',
|
||||
'categories': 'count:7',
|
||||
'creators': 'count:2',
|
||||
'description': 'md5:e604a954a7f827a80e96a3a97d48b269',
|
||||
'location': 'US',
|
||||
'release_year': 2011,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Download only works from Germany',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2',
|
||||
'md5': 'c7728b2dadd04ff6727814847a51ef03',
|
||||
'info_dict': {
|
||||
'id': 'dr-jekyll-mrs-hyde-2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dr. Jekyll & Mrs. Hyde 2',
|
||||
'description': 'md5:c2e9626ebd02de0a794b95407045d186',
|
||||
'upload_date': '20190130',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'timestamp': 1548849437,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Download only works from Germany',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
next_js_data = self._search_nextjs_data(webpage, video_id)
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
query = traverse_obj(next_js_data, (
|
||||
'props', '__dehydratedState', 'queries', ..., 'state',
|
||||
'data', 'data', lambda _, v: v['__typename'] == 'CmsMovie', any))
|
||||
if 'DRM' in traverse_obj(query, ('licenses', 'nodes', ..., 'properties', {str})):
|
||||
self.report_drm(video_id)
|
||||
api_url = f'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/{video_id}.json?d=www'
|
||||
info = self._download_json(api_url, video_id)
|
||||
custom_fields = info['custom_fields']
|
||||
|
||||
production_js = self._download_webpage(
|
||||
'http://www.netzkino.de/beta/dist/production.min.js', video_id,
|
||||
note='Downloading player code')
|
||||
avo_js = self._search_regex(
|
||||
r'var urlTemplate=(\{.*?"\})',
|
||||
production_js, 'URL templates')
|
||||
templates = self._parse_json(
|
||||
avo_js, video_id, transform_source=js_to_json)
|
||||
|
||||
suffix = {
|
||||
'hds': '.mp4/manifest.f4m',
|
||||
'hls': '.mp4/master.m3u8',
|
||||
'pmd': '.mp4',
|
||||
}
|
||||
film_fn = custom_fields['Streaming'][0]
|
||||
formats = [{
|
||||
'format_id': key,
|
||||
'ext': 'mp4',
|
||||
'url': tpl.replace('{}', film_fn) + suffix[key],
|
||||
} for key, tpl in templates.items()]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(query, {
|
||||
'title': ('originalTitle', {clean_html}),
|
||||
'age_limit': ('fskRating', {int_or_none}),
|
||||
'alt_title': ('originalTitle', {clean_html}, filter),
|
||||
'cast': ('cast', 'nodes', ..., 'person', 'name', {clean_html}, filter),
|
||||
'creators': (('directors', 'writers'), 'nodes', ..., 'person', 'name', {clean_html}, filter),
|
||||
'categories': ('categories', 'nodes', ..., 'category', 'title', {clean_html}, filter),
|
||||
'description': ('longSynopsis', {clean_html}, filter),
|
||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
||||
'location': ('productionCountry', {clean_html}, filter),
|
||||
'release_year': ('productionYear', {int_or_none}),
|
||||
'thumbnail': ('coverImage', 'masterUrl', {url_or_none}),
|
||||
'url': ('videoSource', 'pmdUrl', {urljoin('https://pmd.netzkino-seite.netzkino.de/')}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'title': info['title'],
|
||||
'age_limit': int_or_none(custom_fields.get('FSK')[0]),
|
||||
'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
|
||||
'description': clean_html(info.get('content')),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
}
|
||||
|
||||
238
yt_dlp/extractor/nextmedia.py
Normal file
238
yt_dlp/extractor/nextmedia.py
Normal file
@@ -0,0 +1,238 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class NextMediaIE(InfoExtractor):
|
||||
IE_DESC = '蘋果日報'
|
||||
_VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
||||
'md5': 'dff9fad7009311c421176d1ac90bfe4f',
|
||||
'info_dict': {
|
||||
'id': '53109199',
|
||||
'ext': 'mp4',
|
||||
'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:28222b9912b6665a21011b034c70fcc7',
|
||||
'timestamp': 1415456273,
|
||||
'upload_date': '20141108',
|
||||
},
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{ url: \'(.+)\' \}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
return self._extract_from_nextmedia_page(news_id, url, page)
|
||||
|
||||
def _extract_from_nextmedia_page(self, news_id, url, page):
|
||||
redirection_url = self._search_regex(
|
||||
r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
|
||||
page, 'redirection URL', default=None, group='url')
|
||||
if redirection_url:
|
||||
return self.url_result(urllib.parse.urljoin(url, redirection_url))
|
||||
|
||||
title = self._fetch_title(page)
|
||||
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
||||
|
||||
attrs = {
|
||||
'id': news_id,
|
||||
'title': title,
|
||||
'url': video_url, # ext can be inferred from url
|
||||
'thumbnail': self._fetch_thumbnail(page),
|
||||
'description': self._fetch_description(page),
|
||||
}
|
||||
|
||||
timestamp = self._fetch_timestamp(page)
|
||||
if timestamp:
|
||||
attrs['timestamp'] = timestamp
|
||||
else:
|
||||
attrs['upload_date'] = self._fetch_upload_date(url)
|
||||
|
||||
return attrs
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._og_search_title(page)
|
||||
|
||||
def _fetch_thumbnail(self, page):
|
||||
return self._og_search_thumbnail(page)
|
||||
|
||||
def _fetch_timestamp(self, page):
|
||||
date_created = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
|
||||
return parse_iso8601(date_created)
|
||||
|
||||
def _fetch_upload_date(self, url):
|
||||
return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._og_search_property('description', page)
|
||||
|
||||
|
||||
class NextMediaActionNewsIE(NextMediaIE): # XXX: Do not subclass from concrete IE
|
||||
IE_DESC = '蘋果日報 - 動新聞'
|
||||
_VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
||||
'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
|
||||
'info_dict': {
|
||||
'id': '19009428',
|
||||
'ext': 'mp4',
|
||||
'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
|
||||
'timestamp': 1421791200,
|
||||
'upload_date': '20150120',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
actionnews_page = self._download_webpage(url, news_id)
|
||||
article_url = self._og_search_url(actionnews_page)
|
||||
article_page = self._download_webpage(article_url, news_id)
|
||||
return self._extract_from_nextmedia_page(news_id, url, article_page)
|
||||
|
||||
|
||||
class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE
|
||||
IE_DESC = '臺灣蘋果日報'
|
||||
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||
'info_dict': {
|
||||
'id': '36354694',
|
||||
'ext': 'mp4',
|
||||
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
|
||||
'md5': '86b4e9132d158279c7883822d94ccc49',
|
||||
'info_dict': {
|
||||
'id': '550549',
|
||||
'ext': 'mp4',
|
||||
'title': '不滿被踩腳 山東兩大媽一路打下車',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
|
||||
'md5': '03df296d95dedc2d5886debbb80cb43f',
|
||||
'info_dict': {
|
||||
'id': '5003671',
|
||||
'ext': 'mp4',
|
||||
'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
'skip': 'redirect to http://www.appledaily.com.tw/animation/',
|
||||
}, {
|
||||
# No thumbnail
|
||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
|
||||
'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
|
||||
'info_dict': {
|
||||
'id': '5003673',
|
||||
'ext': 'mp4',
|
||||
'title': '半夜尿尿 好像會看到___',
|
||||
'description': 'md5:61d2da7fe117fede148706cdb85ac066',
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'video thumbnail',
|
||||
],
|
||||
'skip': 'redirect to http://www.appledaily.com.tw/animation/',
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
|
||||
'md5': 'eaa20e6b9df418c912d7f5dec2ba734d',
|
||||
'info_dict': {
|
||||
'id': '35770334',
|
||||
'ext': 'mp4',
|
||||
'title': '咖啡占卜測 XU裝熟指數',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
|
||||
'upload_date': '20140417',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
|
||||
'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
|
||||
or self._html_search_meta('description', page, 'news title'))
|
||||
|
||||
def _fetch_thumbnail(self, page):
|
||||
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
|
||||
|
||||
def _fetch_timestamp(self, page):
|
||||
return None
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._html_search_meta('description', page, 'news description')
|
||||
|
||||
|
||||
class NextTVIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_ENABLED = None # XXX: pass through to GenericIE
|
||||
IE_DESC = '壹電視'
|
||||
_VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
|
||||
'info_dict': {
|
||||
'id': '11779671',
|
||||
'ext': 'mp4',
|
||||
'title': '「超收稅」近4千億! 藍議員籲發消費券',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1484825400,
|
||||
'upload_date': '20170119',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||
|
||||
data = self._hidden_inputs(webpage)
|
||||
|
||||
video_url = data['ntt-vod-src-detailview']
|
||||
|
||||
date_str = get_element_by_class('date', webpage)
|
||||
timestamp = unified_timestamp(date_str + '+0800') if date_str else None
|
||||
|
||||
view_count = int_or_none(remove_start(
|
||||
clean_html(get_element_by_class('click', webpage)), '點閱:'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': data.get('ntt-vod-img-src'),
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
}
|
||||
@@ -23,38 +23,96 @@ from ..utils import (
|
||||
|
||||
|
||||
class NhkBaseIE(InfoExtractor):
|
||||
_API_URL_TEMPLATE = 'https://api.nhkworld.jp/showsapi/v1/{lang}/{content_format}_{page_type}/{m_id}{extra_page}'
|
||||
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
|
||||
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
|
||||
|
||||
def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
|
||||
content_format = 'video' if is_video else 'audio'
|
||||
content_type = 'clips' if is_clip else 'episodes'
|
||||
if not is_episode:
|
||||
extra_page = f'/{content_format}_{content_type}'
|
||||
page_type = 'programs'
|
||||
else:
|
||||
extra_page = ''
|
||||
page_type = content_type
|
||||
|
||||
return self._download_json(
|
||||
self._API_URL_TEMPLATE.format(
|
||||
lang=lang, content_format=content_format, page_type=page_type,
|
||||
m_id=m_id, extra_page=extra_page),
|
||||
join_nonempty(m_id, lang))
|
||||
self._API_URL_TEMPLATE % (
|
||||
'v' if is_video else 'r',
|
||||
'clip' if is_clip else 'esd',
|
||||
'episode' if is_episode else 'program',
|
||||
m_id, lang, '/all' if is_video else ''),
|
||||
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
|
||||
|
||||
def _get_api_info(self, refresh=True):
|
||||
if not refresh:
|
||||
return self.cache.load('nhk', 'api_info')
|
||||
|
||||
self.cache.store('nhk', 'api_info', {})
|
||||
movie_player_js = self._download_webpage(
|
||||
'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
|
||||
note='Downloading stream API information')
|
||||
api_info = {
|
||||
'url': self._search_regex(
|
||||
r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
|
||||
'token': self._search_regex(
|
||||
r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
|
||||
}
|
||||
self.cache.store('nhk', 'api_info', api_info)
|
||||
return api_info
|
||||
|
||||
def _extract_stream_info(self, vod_id):
|
||||
for refresh in (False, True):
|
||||
api_info = self._get_api_info(refresh)
|
||||
if not api_info:
|
||||
continue
|
||||
|
||||
api_url = api_info.pop('url')
|
||||
meta = traverse_obj(
|
||||
self._download_json(
|
||||
api_url, vod_id, 'Downloading stream url info', fatal=False, query={
|
||||
**api_info,
|
||||
'type': 'json',
|
||||
'optional_id': vod_id,
|
||||
'active_flg': 1,
|
||||
}), ('meta', 0))
|
||||
stream_url = traverse_obj(
|
||||
meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
|
||||
|
||||
if stream_url:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
|
||||
return {
|
||||
**traverse_obj(meta, {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('publication_date', {unified_timestamp}),
|
||||
'release_timestamp': ('insert_date', {unified_timestamp}),
|
||||
'modified_timestamp': ('update_date', {unified_timestamp}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
raise ExtractorError('Unable to extract stream url')
|
||||
|
||||
def _extract_episode_info(self, url, episode=None):
|
||||
fetch_episode = episode is None
|
||||
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
|
||||
is_video = m_type != 'audio'
|
||||
|
||||
if is_video:
|
||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||
|
||||
if fetch_episode:
|
||||
episode = self._call_api(
|
||||
episode_id, lang, is_video, is_episode=True, is_clip=episode_id[:4] == '9999')
|
||||
episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
|
||||
|
||||
video_id = join_nonempty('id', 'lang', from_dict=episode)
|
||||
def get_clean_field(key):
|
||||
return clean_html(episode.get(key + '_clean') or episode.get(key))
|
||||
|
||||
title = episode.get('title')
|
||||
series = traverse_obj(episode, (('video_program', 'audio_program'), any, 'title'))
|
||||
title = get_clean_field('sub_title')
|
||||
series = get_clean_field('title')
|
||||
|
||||
thumbnails = []
|
||||
for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
|
||||
img_path = episode.get('image' + s)
|
||||
if not img_path:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': f'{h}p',
|
||||
'height': h,
|
||||
'width': w,
|
||||
'url': 'https://www3.nhk.or.jp' + img_path,
|
||||
})
|
||||
|
||||
episode_name = title
|
||||
if series and title:
|
||||
@@ -67,52 +125,37 @@ class NhkBaseIE(InfoExtractor):
|
||||
episode_name = None
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'id': episode_id + '-' + lang,
|
||||
'title': title,
|
||||
'description': get_clean_field('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'series': series,
|
||||
'episode': episode_name,
|
||||
**traverse_obj(episode, {
|
||||
'description': ('description', {str}),
|
||||
'release_timestamp': ('first_broadcasted_at', {unified_timestamp}),
|
||||
'categories': ('categories', ..., 'name', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'thumbnails': ('images', lambda _, v: v['url'], {
|
||||
'url': ('url', {urljoin(url)}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
'webpage_url': ('url', {urljoin(url)}),
|
||||
}),
|
||||
'extractor_key': NhkVodIE.ie_key(),
|
||||
'extractor': NhkVodIE.IE_NAME,
|
||||
}
|
||||
|
||||
# XXX: We are assuming that 'video' and 'audio' are mutually exclusive
|
||||
stream_info = traverse_obj(episode, (('video', 'audio'), {dict}, any)) or {}
|
||||
if not stream_info.get('url'):
|
||||
self.raise_no_formats('Stream not found; it has most likely expired', expected=True)
|
||||
if is_video:
|
||||
vod_id = episode['vod_id']
|
||||
info.update({
|
||||
**self._extract_stream_info(vod_id),
|
||||
'id': vod_id,
|
||||
})
|
||||
|
||||
else:
|
||||
stream_url = stream_info['url']
|
||||
if is_video:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(stream_info, ({
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('published_at', {unified_timestamp}),
|
||||
})),
|
||||
})
|
||||
else:
|
||||
if fetch_episode:
|
||||
# From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
|
||||
audio_path = remove_end(stream_url, '.m4a')
|
||||
audio_path = remove_end(episode['audio']['audio'], '.m4a')
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8',
|
||||
episode_id, 'm4a', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in info['formats']:
|
||||
f['language'] = lang
|
||||
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': NhkVodIE.ie_key(),
|
||||
'url': url,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -125,29 +168,29 @@ class NhkVodIE(NhkBaseIE):
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2049165/',
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2049126/',
|
||||
'info_dict': {
|
||||
'id': '2049165-en',
|
||||
'id': 'nw_vod_v_en_2049_126_20230413233000_01_1681398302',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japan Railway Journal - Choshi Electric Railway: Fighting to Get Back on Track',
|
||||
'description': 'md5:ab57df2fca7f04245148c2e787bb203d',
|
||||
'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
|
||||
'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'episode': 'Choshi Electric Railway: Fighting to Get Back on Track',
|
||||
'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
|
||||
'series': 'Japan Railway Journal',
|
||||
'duration': 1680,
|
||||
'categories': ['Biz & Tech'],
|
||||
'tags': ['Akita', 'Chiba', 'Trains', 'Transcript', 'All (Japan Navigator)'],
|
||||
'timestamp': 1759055880,
|
||||
'upload_date': '20250928',
|
||||
'release_timestamp': 1758810600,
|
||||
'release_date': '20250925',
|
||||
'modified_timestamp': 1707217907,
|
||||
'timestamp': 1681428600,
|
||||
'release_timestamp': 1693883728,
|
||||
'duration': 1679,
|
||||
'upload_date': '20230413',
|
||||
'modified_date': '20240206',
|
||||
'release_date': '20230905',
|
||||
},
|
||||
}, {
|
||||
# video clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '153c3016dfd252ba09726588149cf0e7',
|
||||
'info_dict': {
|
||||
'id': '9999011-en',
|
||||
'id': 'lpZXIwaDE6_Z-976CPsFdxyICyWUzlT5',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||
@@ -155,23 +198,24 @@ class NhkVodIE(NhkBaseIE):
|
||||
'series': 'Dining with the Chef',
|
||||
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||
'duration': 148,
|
||||
'categories': ['Food'],
|
||||
'tags': ['Washoku'],
|
||||
'timestamp': 1548212400,
|
||||
'upload_date': '20190123',
|
||||
'upload_date': '20190816',
|
||||
'release_date': '20230902',
|
||||
'release_timestamp': 1693619292,
|
||||
'modified_timestamp': 1707217907,
|
||||
'modified_date': '20240206',
|
||||
'timestamp': 1565997540,
|
||||
},
|
||||
}, {
|
||||
# radio
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20240901-1/',
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/livinginjapan-20231001-1/',
|
||||
'info_dict': {
|
||||
'id': 'livinginjapan-20240901-1-en',
|
||||
'id': 'livinginjapan-20231001-1-en',
|
||||
'ext': 'm4a',
|
||||
'title': 'Living in Japan - Weekend Hiking / Self-protection from crime',
|
||||
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
|
||||
'series': 'Living in Japan',
|
||||
'description': 'md5:4d0e14ab73bdbfedb60a53b093954ed6',
|
||||
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'episode': 'Weekend Hiking / Self-protection from crime',
|
||||
'categories': ['Interactive'],
|
||||
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||
@@ -212,51 +256,96 @@ class NhkVodIE(NhkBaseIE):
|
||||
},
|
||||
'skip': 'expires 2023-10-15',
|
||||
}, {
|
||||
# a one-off (single-episode series). title from the api is just null
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/',
|
||||
# a one-off (single-episode series). title from the api is just '<p></p>'
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/',
|
||||
'info_dict': {
|
||||
'id': '3026036-en',
|
||||
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
|
||||
'ext': 'mp4',
|
||||
'title': 'STATELESS: The Japanese Left Behind in the Philippines',
|
||||
'description': 'md5:9a2fd51cdfa9f52baae28569e0053786',
|
||||
'duration': 2955,
|
||||
'thumbnail': 'https://www3.nhk.or.jp/nhkworld/en/shows/3026036/images/wide_l_QPtWpt4lzVhm3NzPAMIIF35MCg4CdNwcikPaTS5Q.jpg',
|
||||
'categories': ['Documentary', 'Culture & Lifestyle'],
|
||||
'tags': ['Transcript', 'Documentary 360', 'The Pursuit of PEACE'],
|
||||
'timestamp': 1758931800,
|
||||
'upload_date': '20250927',
|
||||
'release_timestamp': 1758931800,
|
||||
'release_date': '20250927',
|
||||
'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla',
|
||||
'description': 'md5:5db620c46a0698451cc59add8816b797',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'release_date': '20230905',
|
||||
'timestamp': 1690103400,
|
||||
'duration': 2939,
|
||||
'release_timestamp': 1693898699,
|
||||
'upload_date': '20230723',
|
||||
'modified_timestamp': 1707217907,
|
||||
'modified_date': '20240206',
|
||||
'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
|
||||
'series': 'Barakan Discovers',
|
||||
},
|
||||
}, {
|
||||
# /ondemand/video/ url with alphabetical character in 5th position of id
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
|
||||
'info_dict': {
|
||||
'id': '9999a07-en',
|
||||
'id': 'nw_c_en_9999-a07',
|
||||
'ext': 'mp4',
|
||||
'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
|
||||
'series': 'Mini-Dramas on SDGs',
|
||||
'modified_date': '20240206',
|
||||
'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
|
||||
'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
|
||||
'timestamp': 1621911600,
|
||||
'duration': 190,
|
||||
'timestamp': 1621962360,
|
||||
'duration': 189,
|
||||
'release_date': '20230903',
|
||||
'modified_timestamp': 1707217907,
|
||||
'upload_date': '20210525',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'categories': ['Current Affairs', 'Entertainment'],
|
||||
'release_timestamp': 1693713487,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
|
||||
'info_dict': {
|
||||
'id': '9999d17-en',
|
||||
'id': 'nw_c_en_9999-d17',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
|
||||
'description': 'Today’s focus: Snow',
|
||||
'release_timestamp': 1693792402,
|
||||
'release_date': '20230904',
|
||||
'upload_date': '20220128',
|
||||
'timestamp': 1643370960,
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 136,
|
||||
'categories': ['Culture & Lifestyle', 'Science & Nature'],
|
||||
'tags': ['Nara', 'Temples & Shrines', 'Winter', 'Snow'],
|
||||
'timestamp': 1643339040,
|
||||
'upload_date': '20220128',
|
||||
'series': '',
|
||||
'modified_date': '20240206',
|
||||
'modified_timestamp': 1707217907,
|
||||
},
|
||||
}, {
|
||||
# new /shows/ url format
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japanology Plus - 20th Anniversary Special Part 1',
|
||||
'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
|
||||
'episode': '20th Anniversary Special Part 1',
|
||||
'series': 'Japanology Plus',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 1680,
|
||||
'timestamp': 1711020600,
|
||||
'upload_date': '20240321',
|
||||
'release_timestamp': 1711022683,
|
||||
'release_date': '20240321',
|
||||
'modified_timestamp': 1711031012,
|
||||
'modified_date': '20240321',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
|
||||
'info_dict': {
|
||||
'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
|
||||
'ext': 'mp4',
|
||||
'title': '100 Ideas to Save the World - Working Styles Evolve',
|
||||
'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
|
||||
'episode': 'Working Styles Evolve',
|
||||
'series': '100 Ideas to Save the World',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'duration': 899,
|
||||
'upload_date': '20230325',
|
||||
'timestamp': 1679755200,
|
||||
'release_date': '20230905',
|
||||
'release_timestamp': 1693880540,
|
||||
'modified_date': '20240206',
|
||||
'modified_timestamp': 1707217907,
|
||||
},
|
||||
}, {
|
||||
# new /shows/audio/ url format
|
||||
@@ -284,7 +373,6 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'sumo',
|
||||
'title': 'GRAND SUMO Highlights',
|
||||
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
|
||||
'series': 'GRAND SUMO Highlights',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
@@ -293,7 +381,6 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'japanrailway',
|
||||
'title': 'Japan Railway Journal',
|
||||
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
|
||||
'series': 'Japan Railway Journal',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
@@ -303,7 +390,6 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'japanrailway',
|
||||
'title': 'Japan Railway Journal',
|
||||
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
|
||||
'series': 'Japan Railway Journal',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
@@ -313,9 +399,17 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'id': 'livinginjapan',
|
||||
'title': 'Living in Japan',
|
||||
'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
|
||||
'series': 'Living in Japan',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
# /tv/ program url
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
|
||||
'info_dict': {
|
||||
'id': 'designtalksplus',
|
||||
'title': 'DESIGN TALKS plus',
|
||||
'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
|
||||
'only_matching': True,
|
||||
@@ -336,8 +430,9 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
program_id, lang, m_type != 'audio', False, episode_type == 'clip')
|
||||
|
||||
def entries():
|
||||
for episode in traverse_obj(episodes, ('items', lambda _, v: v['url'])):
|
||||
yield self._extract_episode_info(urljoin(url, episode['url']), episode)
|
||||
for episode in episodes:
|
||||
if episode_path := episode.get('url'):
|
||||
yield self._extract_episode_info(urljoin(url, episode_path), episode)
|
||||
|
||||
html = self._download_webpage(url, program_id)
|
||||
program_title = self._extract_meta_from_class_elements([
|
||||
@@ -351,7 +446,7 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||
'tAudioProgramMain__info', # /shows/audio/programs/
|
||||
'p-program-description'], html) # /tv/
|
||||
|
||||
return self.playlist_result(entries(), program_id, program_title, program_description, series=program_title)
|
||||
return self.playlist_result(entries(), program_id, program_title, program_description)
|
||||
|
||||
|
||||
class NhkForSchoolBangumiIE(InfoExtractor):
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PandaTvIE(InfoExtractor):
|
||||
IE_DESC = 'pandalive.co.kr (팬더티비)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?pandalive\.co\.kr/play/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pandalive.co.kr/play/bebenim',
|
||||
'info_dict': {
|
||||
'id': 'bebenim',
|
||||
'ext': 'mp4',
|
||||
'channel': '릴리ෆ',
|
||||
'title': r're:앙앙❤ \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'thumbnail': r're:https://cdn\.pandalive\.co\.kr/ivs/v1/.+/thumb\.jpg',
|
||||
'concurrent_view_count': int,
|
||||
'like_count': int,
|
||||
'live_status': 'is_live',
|
||||
'upload_date': str,
|
||||
},
|
||||
'skip': 'The channel is not currently live',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
video_meta = self._download_json(
|
||||
'https://api.pandalive.co.kr/v1/live/play', channel_id,
|
||||
'Downloading video meta data', 'Unable to download video meta data',
|
||||
data=urlencode_postdata(filter_dict({
|
||||
'action': 'watch',
|
||||
'userId': channel_id,
|
||||
'password': self.get_param('videopassword'),
|
||||
})), expected_status=400)
|
||||
|
||||
if error_code := traverse_obj(video_meta, ('errorData', 'code', {str})):
|
||||
if error_code == 'castEnd':
|
||||
raise UserNotLive(video_id=channel_id)
|
||||
elif error_code == 'needAdult':
|
||||
self.raise_login_required('Adult verification is required for this stream')
|
||||
elif error_code == 'needLogin':
|
||||
self.raise_login_required('Login is required for this stream')
|
||||
elif error_code == 'needCoinPurchase':
|
||||
raise ExtractorError('Coin purchase is required for this stream', expected=True)
|
||||
elif error_code == 'needUnlimitItem':
|
||||
raise ExtractorError('Ticket purchase is required for this stream', expected=True)
|
||||
elif error_code == 'needPw':
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
elif error_code == 'wrongPw':
|
||||
raise ExtractorError('Wrong password', expected=True)
|
||||
else:
|
||||
error_msg = video_meta.get('message')
|
||||
raise ExtractorError(join_nonempty(
|
||||
'API returned error code', error_code,
|
||||
error_msg and 'with error message:', error_msg,
|
||||
delim=' '))
|
||||
|
||||
http_headers = {'Origin': 'https://www.pandalive.co.kr'}
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'is_live': True,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
video_meta['PlayList']['hls'][0]['url'], channel_id, 'mp4', headers=http_headers, live=True),
|
||||
'http_headers': http_headers,
|
||||
**traverse_obj(video_meta, ('media', {
|
||||
'title': ('title', {str}),
|
||||
'release_timestamp': ('startTime', {parse_iso8601(delim=' ')}),
|
||||
'thumbnail': ('ivsThumbnail', {url_or_none}),
|
||||
'channel': ('userNick', {str}),
|
||||
'concurrent_view_count': ('user', {int_or_none}),
|
||||
'like_count': ('likeCnt', {int_or_none}),
|
||||
})),
|
||||
}
|
||||
@@ -6,10 +6,7 @@ from ..utils.traversal import traverse_obj
|
||||
class PartiBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, note=None):
|
||||
return self._download_json(
|
||||
f'https://prod-api.parti.com/parti_v2/profile/{path}', video_id, note, headers={
|
||||
'Origin': 'https://parti.com',
|
||||
'Referer': 'https://parti.com/',
|
||||
})
|
||||
f'https://api-backend.parti.com/parti_v2/profile/{path}', video_id, note)
|
||||
|
||||
|
||||
class PartiVideoIE(PartiBaseIE):
|
||||
@@ -23,7 +20,7 @@ class PartiVideoIE(PartiBaseIE):
|
||||
'title': 'NOW LIVE ',
|
||||
'upload_date': '20250327',
|
||||
'categories': ['Gaming'],
|
||||
'thumbnail': 'https://media.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
|
||||
'thumbnail': 'https://assets.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
|
||||
'channel': 'ItZTMGG',
|
||||
'timestamp': 1743044379,
|
||||
},
|
||||
@@ -37,7 +34,7 @@ class PartiVideoIE(PartiBaseIE):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
urljoin('https://media.parti.com/', data['livestream_recording']), video_id, 'mp4'),
|
||||
urljoin('https://watch.parti.com', data['livestream_recording']), video_id, 'mp4'),
|
||||
**traverse_obj(data, {
|
||||
'title': ('event_title', {str}),
|
||||
'channel': ('user_name', {str}),
|
||||
@@ -50,27 +47,32 @@ class PartiVideoIE(PartiBaseIE):
|
||||
|
||||
class PartiLivestreamIE(PartiBaseIE):
|
||||
IE_NAME = 'parti:livestream'
|
||||
_VALID_URL = r'https?://(?:www\.)?parti\.com/(?!video/)(?P<id>[\w/-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?parti\.com/creator/(?P<service>[\w]+)/(?P<id>[\w/-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://parti.com/247CryptoTracker',
|
||||
'url': 'https://parti.com/creator/parti/Capt_Robs_Adventures',
|
||||
'info_dict': {
|
||||
'id': 'Capt_Robs_Adventures',
|
||||
'ext': 'mp4',
|
||||
'id': '247CryptoTracker',
|
||||
'description': 'md5:a78051f3d7e66e6a64c6b1eaf59fd364',
|
||||
'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}",
|
||||
'thumbnail': r're:https://media\.parti\.com/stream-screenshots/.+\.png',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:https://assets\.parti\.com/.+\.png',
|
||||
'timestamp': 1743879776,
|
||||
'upload_date': '20250405',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://parti.com/creator/discord/sazboxgaming/0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
creator_slug = self._match_id(url)
|
||||
service, creator_slug = self._match_valid_url(url).group('service', 'id')
|
||||
|
||||
encoded_creator_slug = creator_slug.replace('/', '%23')
|
||||
creator_id = self._call_api(
|
||||
f'user_id_from_name/{encoded_creator_slug}',
|
||||
creator_slug, note='Fetching user ID')['user_id']
|
||||
f'get_user_by_social_media/{service}/{encoded_creator_slug}',
|
||||
creator_slug, note='Fetching user ID')
|
||||
|
||||
data = self._call_api(
|
||||
f'get_livestream_channel_info/{creator_id}', creator_id,
|
||||
@@ -83,7 +85,11 @@ class PartiLivestreamIE(PartiBaseIE):
|
||||
|
||||
return {
|
||||
'id': creator_slug,
|
||||
'formats': self._extract_m3u8_formats(channel_info['playback_url'], creator_slug, live=True),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
channel_info['playback_url'], creator_slug, live=True, query={
|
||||
'token': channel_info['playback_auth_token'],
|
||||
'player_version': '1.17.0',
|
||||
}),
|
||||
'is_live': True,
|
||||
**traverse_obj(data, {
|
||||
'title': ('livestream_event_info', 'event_name', {str}),
|
||||
|
||||
@@ -598,8 +598,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
|
||||
if not campaign_id:
|
||||
campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
|
||||
((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'),
|
||||
'id', {str}, any, {require('campaign ID')}))
|
||||
lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')}))
|
||||
|
||||
params = {
|
||||
'json-api-use-default-includes': 'false',
|
||||
|
||||
@@ -4,7 +4,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
update_url,
|
||||
)
|
||||
@@ -51,6 +50,7 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
if metadata.get('online') == 0:
|
||||
raise ExtractorError('Stream is offline', expected=True)
|
||||
title = metadata['title']
|
||||
|
||||
cdn_data = self._download_json(''.join((
|
||||
update_url(data['getLoadBalancerUrl']['url'], scheme='https'),
|
||||
@@ -79,7 +79,7 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': strip_or_none(metadata.get('title')),
|
||||
'title': title.strip(),
|
||||
'is_live': True,
|
||||
'channel': channel_id,
|
||||
'channel_id': metadata.get('id'),
|
||||
@@ -159,7 +159,7 @@ class PicartoVodIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
**traverse_obj(data, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'title': ('title', {str.strip}),
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': 'video_recording_image_url',
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'age_limit': ('adult', {lambda x: 18 if x else 0}),
|
||||
|
||||
@@ -24,7 +24,6 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import find_elements, traverse_obj
|
||||
|
||||
|
||||
class PornHubBaseIE(InfoExtractor):
|
||||
@@ -138,24 +137,23 @@ class PornHubIE(PornHubBaseIE):
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
'md5': '4d4a4e9178b655776f86cf89ecaf0edf',
|
||||
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
|
||||
'info_dict': {
|
||||
'id': '648719015',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
|
||||
'uploader': 'BABES-COM',
|
||||
'uploader_id': '/users/babes-com',
|
||||
'uploader': 'Babes',
|
||||
'upload_date': '20130628',
|
||||
'timestamp': 1372447216,
|
||||
'duration': 361,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
'cast': list,
|
||||
'thumbnail': r're:https?://.+',
|
||||
},
|
||||
}, {
|
||||
# non-ASCII title
|
||||
@@ -482,6 +480,13 @@ class PornHubIE(PornHubBaseIE):
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
def extract_list(meta_key):
|
||||
div = self._search_regex(
|
||||
rf'(?s)<div[^>]+\bclass=["\'].*?\b{meta_key}Wrapper[^>]*>(.+?)</div>',
|
||||
webpage, meta_key, default=None)
|
||||
if div:
|
||||
return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
# description provided in JSON-LD is irrelevant
|
||||
info['description'] = None
|
||||
@@ -500,11 +505,9 @@ class PornHubIE(PornHubBaseIE):
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
**traverse_obj(webpage, {
|
||||
'tags': ({find_elements(attr='data-label', value='tag')}, ..., {clean_html}),
|
||||
'categories': ({find_elements(attr='data-label', value='category')}, ..., {clean_html}),
|
||||
'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
|
||||
}),
|
||||
'tags': extract_list('tags'),
|
||||
'categories': extract_list('categories'),
|
||||
'cast': extract_list('pornstars'),
|
||||
'subtitles': subtitles,
|
||||
}, info)
|
||||
|
||||
|
||||
@@ -15,15 +15,14 @@ class S4CIE(InfoExtractor):
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
|
||||
},
|
||||
}, {
|
||||
# Geo restricted to the UK
|
||||
'url': 'https://www.s4c.cymru/clic/programme/886303048',
|
||||
'url': 'https://www.s4c.cymru/clic/programme/856636948',
|
||||
'info_dict': {
|
||||
'id': '886303048',
|
||||
'id': '856636948',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pennod 1',
|
||||
'description': 'md5:7e3f364b70f61fcdaa8b4cb4a3eb3e7a',
|
||||
'title': 'Am Dro',
|
||||
'duration': 2880,
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Stad_2025S4C_P1_210053.jpg',
|
||||
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
|
||||
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -52,7 +51,7 @@ class S4CIE(InfoExtractor):
|
||||
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
|
||||
'mode': 'od',
|
||||
'application': 'clic',
|
||||
'region': 'UK' if player_config.get('application') == 's4chttpl' else 'WW',
|
||||
'region': 'WW',
|
||||
'extra': 'false',
|
||||
'thirdParty': 'false',
|
||||
'filename': player_config['filename'],
|
||||
|
||||
137
yt_dlp/extractor/scte.py
Normal file
137
yt_dlp/extractor/scte.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
decode_packed_codes,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class SCTEBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
|
||||
_NETRC_MACHINE = 'scte'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_popup = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'class=["\']welcome\b', r'>Sign Out<'))
|
||||
|
||||
# already logged in
|
||||
if is_logged(login_popup):
|
||||
return
|
||||
|
||||
login_form = self._hidden_inputs(login_popup)
|
||||
|
||||
login_form.update({
|
||||
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
|
||||
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
|
||||
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
|
||||
})
|
||||
|
||||
response = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if '|pageRedirect|' not in response and not is_logged(response):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError(f'Unable to login: {error}', expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class SCTEIE(SCTEBaseIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
|
||||
'info_dict': {
|
||||
'title': 'Introduction to DOCSIS Engineering Professional',
|
||||
'id': '31484',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
'skip': 'Requires account credentials',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
|
||||
content_base = f'https://learning.scte.org/pluginfile.php/{context_id}/mod_scorm/content/8/'
|
||||
context = decode_packed_codes(self._download_webpage(
|
||||
f'{content_base}mobile/data.js', video_id))
|
||||
|
||||
data = self._parse_xml(
|
||||
self._search_regex(
|
||||
r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
|
||||
video_id)
|
||||
|
||||
entries = []
|
||||
for asset in data.findall('.//asset'):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url or not asset_url.endswith('.mp4'):
|
||||
continue
|
||||
asset_id = self._search_regex(
|
||||
r'video_([^_]+)_', asset_url, 'asset id', default=None)
|
||||
if not asset_id:
|
||||
continue
|
||||
entries.append({
|
||||
'id': asset_id,
|
||||
'title': title,
|
||||
'url': content_base + asset_url,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, video_id, title)
|
||||
|
||||
|
||||
class SCTECourseIE(SCTEBaseIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://learning.scte.org/course/view.php?id=3639',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://learning.scte.org/course/view.php?id=3073',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1>(.+?)</h1>', webpage, 'title', default=None)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'''(?x)
|
||||
<a[^>]+
|
||||
href=(["\'])
|
||||
(?P<url>
|
||||
https?://learning\.scte\.org/mod/
|
||||
(?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
|
||||
\bid=\d+
|
||||
)
|
||||
''',
|
||||
webpage):
|
||||
item_url = mobj.group('url')
|
||||
if item_url == url:
|
||||
continue
|
||||
ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
|
||||
else SCTECourseIE.ie_key())
|
||||
entries.append(self.url_result(item_url, ie=ie))
|
||||
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
@@ -1064,7 +1064,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE):
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?:soundcloud(?:%3A|:)playlists(?:%3A|:))?(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
_VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.soundcloud.com/playlists/4110309',
|
||||
@@ -1079,12 +1079,6 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
'album': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://api.soundcloud.com/playlists/soundcloud%3Aplaylists%3A1759227795',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.soundcloud.com/playlists/soundcloud:playlists:2104769627?secret_token=s-wmpCLuExeYX',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -8,11 +8,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class SportDeutschlandIE(InfoExtractor):
|
||||
IE_NAME = 'sporteurope'
|
||||
_VALID_URL = r'https?://(?:player\.)?sporteurope\.tv/(?P<id>(?:[^/?#]+/)?[^?#/&]+)'
|
||||
_VALID_URL = r'https?://(?:player\.)?sportdeutschland\.tv/(?P<id>(?:[^/?#]+/)?[^?#/&]+)'
|
||||
_TESTS = [{
|
||||
# Single-part video, direct link
|
||||
'url': 'https://sporteurope.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
|
||||
'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
|
||||
'md5': '35c11a19395c938cdd076b93bda54cde',
|
||||
'info_dict': {
|
||||
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
|
||||
@@ -20,9 +19,9 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
|
||||
'display_id': 'rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
|
||||
'channel': 'Rostock Griffins',
|
||||
'channel_url': 'https://sporteurope.tv/rostock-griffins',
|
||||
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
|
||||
'live_status': 'was_live',
|
||||
'description': r're:Video-Livestream des Spiels Rostock Griffins vs\. Elmshorn Fighting Pirates.+',
|
||||
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
|
||||
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
|
||||
'timestamp': 1749913117,
|
||||
'upload_date': '20250614',
|
||||
@@ -30,16 +29,16 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# Single-part video, embedded player link
|
||||
'url': 'https://player.sporteurope.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc',
|
||||
'url': 'https://player.sportdeutschland.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc',
|
||||
'info_dict': {
|
||||
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
|
||||
'ext': 'mp4',
|
||||
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
|
||||
'display_id': '9e9619c4-7d77-43c4-926d-49fb57dc06dc',
|
||||
'channel': 'Rostock Griffins',
|
||||
'channel_url': 'https://sporteurope.tv/rostock-griffins',
|
||||
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
|
||||
'live_status': 'was_live',
|
||||
'description': r're:Video-Livestream des Spiels Rostock Griffins vs\. Elmshorn Fighting Pirates.+',
|
||||
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
|
||||
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
|
||||
'timestamp': 1749913117,
|
||||
'upload_date': '20250614',
|
||||
@@ -48,7 +47,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# Multi-part video
|
||||
'url': 'https://sporteurope.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
|
||||
'url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
|
||||
'info_dict': {
|
||||
'id': '9f63d737-2444-4e3a-a1ea-840df73fd481',
|
||||
'display_id': 'rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
|
||||
@@ -56,7 +55,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'description': 'md5:0a17da15e48a687e6019639c3452572b',
|
||||
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
|
||||
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
|
||||
'channel_url': 'https://sporteurope.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
@@ -67,7 +66,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 1',
|
||||
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
|
||||
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
|
||||
'channel_url': 'https://sporteurope.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'duration': 14773.0,
|
||||
'timestamp': 1753085197,
|
||||
'upload_date': '20250721',
|
||||
@@ -80,17 +79,16 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 2',
|
||||
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
|
||||
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
|
||||
'channel_url': 'https://sporteurope.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
|
||||
'duration': 14773.0,
|
||||
'timestamp': 1753128421,
|
||||
'upload_date': '20250721',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}],
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# Livestream
|
||||
'url': 'https://sporteurope.tv/dtb/gymnastik-international-tag-1',
|
||||
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
|
||||
'info_dict': {
|
||||
'id': '95d71b8a-370a-4b87-ad16-94680da18528',
|
||||
'ext': 'mp4',
|
||||
@@ -98,7 +96,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'display_id': 'dtb/gymnastik-international-tag-1',
|
||||
'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
|
||||
'channel': 'Deutscher Turner-Bund',
|
||||
'channel_url': 'https://sporteurope.tv/dtb',
|
||||
'channel_url': 'https://sportdeutschland.tv/dtb',
|
||||
'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
@@ -108,9 +106,9 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
def _process_video(self, asset_id, video):
|
||||
is_live = video['type'] == 'mux_live'
|
||||
token = self._download_json(
|
||||
f'https://api.sporteurope.tv/api/web/personal/asset-token/{asset_id}',
|
||||
f'https://api.sportdeutschland.tv/api/web/personal/asset-token/{asset_id}',
|
||||
video['id'], query={'type': video['type'], 'playback_id': video['src']},
|
||||
headers={'Referer': 'https://sporteurope.tv/'})['token']
|
||||
headers={'Referer': 'https://sportdeutschland.tv/'})['token']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://stream.mux.com/{video["src"]}.m3u8?token={token}', video['id'], live=is_live)
|
||||
|
||||
@@ -128,7 +126,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
f'https://api.sporteurope.tv/api/stateless/frontend/assets/{display_id}',
|
||||
f'https://api.sportdeutschland.tv/api/stateless/frontend/assets/{display_id}',
|
||||
display_id, query={'access_token': 'true'})
|
||||
|
||||
info = {
|
||||
@@ -141,7 +139,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'channel_id': ('profile', 'id'),
|
||||
'is_live': 'currently_live',
|
||||
'was_live': 'was_live',
|
||||
'channel_url': ('profile', 'slug', {lambda x: f'https://sporteurope.tv/{x}'}),
|
||||
'channel_url': ('profile', 'slug', {lambda x: f'https://sportdeutschland.tv/{x}'}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
@@ -101,8 +101,8 @@ class SproutVideoIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
|
||||
data = self._search_json(
|
||||
r'(?:window\.|(?:var|const|let)\s+)(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage,
|
||||
'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||
r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info',
|
||||
video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||
transform_source=lambda x: base64.b64decode(x).decode())
|
||||
|
||||
# SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e]
|
||||
|
||||
@@ -1,243 +0,0 @@
|
||||
import base64
|
||||
import binascii
|
||||
import functools
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
find_elements,
|
||||
require,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class TarangPlusBaseIE(InfoExtractor):
|
||||
_BASE_URL = 'https://tarangplus.in'
|
||||
|
||||
|
||||
class TarangPlusVideoIE(TarangPlusBaseIE):
|
||||
IE_NAME = 'tarangplus:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?:movies|[^#?/]+/[^#?/]+)/(?!episodes)(?P<id>[^#?/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tarangplus.in/tarangaplus-originals/khitpit/khitpit-ep-10',
|
||||
'md5': '78ce056cee755687b8a48199909ecf53',
|
||||
'info_dict': {
|
||||
'id': '67b8206719521d054c0059b7',
|
||||
'display_id': 'khitpit-ep-10',
|
||||
'ext': 'mp4',
|
||||
'title': 'Khitpit Ep-10',
|
||||
'description': 'md5:a45b805cb628e15c853d78b0406eab48',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 756.0,
|
||||
'timestamp': 1740355200,
|
||||
'upload_date': '20250224',
|
||||
'media_type': 'episode',
|
||||
'categories': ['Originals'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/tarang-serials/bada-bohu/bada-bohu-ep-233',
|
||||
'md5': 'b4f9beb15172559bb362203b4f48382e',
|
||||
'info_dict': {
|
||||
'id': '680b9d6c19521d054c007782',
|
||||
'display_id': 'bada-bohu-ep-233',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bada Bohu | Ep -233',
|
||||
'description': 'md5:e6b8e7edc9e60b92c1b390f8789ecd69',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 1392.0,
|
||||
'timestamp': 1745539200,
|
||||
'upload_date': '20250425',
|
||||
'media_type': 'episode',
|
||||
'categories': ['Prime'],
|
||||
},
|
||||
}, {
|
||||
# Decrypted m3u8 URL has trailing control characters that need to be stripped
|
||||
'url': 'https://tarangplus.in/tarangaplus-originals/ichha/ichha-teaser-1',
|
||||
'md5': '16ee43fe21ad8b6e652ec65eba38a64e',
|
||||
'info_dict': {
|
||||
'id': '5f0f252d3326af0720000342',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'ichha-teaser-1',
|
||||
'title': 'Ichha Teaser',
|
||||
'description': 'md5:c724b0b0669a2cefdada3711cec792e6',
|
||||
'media_type': 'episode',
|
||||
'duration': 21.0,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'categories': ['Originals'],
|
||||
'timestamp': 1758153600,
|
||||
'upload_date': '20250918',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/short/ai-maa/ai-maa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/shows/tarang-cine-utsav-2024/tarang-cine-utsav-2024-seg-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/music-videos/chori-chori-bohu-chori-songs/nijara-laguchu-dhire-dhire',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/kids-shows/chhota-jaga/chhota-jaga-ep-33-jamidar-ra-khajana-adaya',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/movies/swayambara',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def decrypt(self, data, key):
|
||||
if not Cryptodome.AES:
|
||||
raise ExtractorError('pycryptodomex not found. Please install', expected=True)
|
||||
iv = binascii.unhexlify('00000000000000000000000000000000')
|
||||
cipher = Cryptodome.AES.new(base64.b64decode(key), Cryptodome.AES.MODE_CBC, iv)
|
||||
return cipher.decrypt(base64.b64decode(data)).decode('utf-8')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
hidden_inputs_data = self._hidden_inputs(webpage)
|
||||
json_ld_data = self._search_json_ld(webpage, display_id)
|
||||
json_ld_data.pop('url', None)
|
||||
|
||||
iframe_url = traverse_obj(webpage, (
|
||||
{find_element(tag='iframe', attr='src', value=r'.+[?&]contenturl=.+', html=True, regex=True)},
|
||||
{extract_attributes}, 'src', {require('iframe URL')}))
|
||||
# Can't use parse_qs here since it would decode the encrypted base64 `+` chars to spaces
|
||||
content = self._search_regex(r'[?&]contenturl=(.+)', iframe_url, 'content')
|
||||
encrypted_data, _, attrs = content.partition('|')
|
||||
metadata = {
|
||||
m.group('k'): m.group('v')
|
||||
for m in re.finditer(r'(?:^|\|)(?P<k>[a-z_]+)=(?P<v>(?:(?!\|[a-z_]+=).)+)', attrs)
|
||||
}
|
||||
m3u8_url = urllib.parse.unquote(
|
||||
self.decrypt(encrypted_data, metadata['key'])).rstrip('\x0e\x0f')
|
||||
|
||||
return {
|
||||
'id': display_id, # Fallback
|
||||
'display_id': display_id,
|
||||
**json_ld_data,
|
||||
**traverse_obj(metadata, {
|
||||
'id': ('content_id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('image', {str}),
|
||||
}),
|
||||
**traverse_obj(hidden_inputs_data, {
|
||||
'id': ('content_id', {str}),
|
||||
'media_type': ('theme_type', {str}),
|
||||
'categories': ('genre', {str}, filter, all, filter),
|
||||
}),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, display_id),
|
||||
}
|
||||
|
||||
|
||||
class TarangPlusEpisodesIE(TarangPlusBaseIE):
|
||||
IE_NAME = 'tarangplus:episodes'
|
||||
_VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?P<type>[^#?/]+)/(?P<id>[^#?/]+)/episodes/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://tarangplus.in/tarangaplus-originals/balijatra/episodes',
|
||||
'info_dict': {
|
||||
'id': 'balijatra',
|
||||
'title': 'Balijatra',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/tarang-serials/bada-bohu/episodes',
|
||||
'info_dict': {
|
||||
'id': 'bada-bohu',
|
||||
'title': 'Bada Bohu',
|
||||
},
|
||||
'playlist_mincount': 236,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/shows/dr-nonsense/episodes',
|
||||
'info_dict': {
|
||||
'id': 'dr-nonsense',
|
||||
'title': 'Dr. Nonsense',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}]
|
||||
_PAGE_SIZE = 20
|
||||
|
||||
def _entries(self, playlist_url, playlist_id, page):
|
||||
data = self._download_json(
|
||||
playlist_url, playlist_id, f'Downloading playlist JSON page {page + 1}',
|
||||
query={'page_no': page})
|
||||
for item in traverse_obj(data, ('items', ..., {str})):
|
||||
yield self.url_result(
|
||||
urljoin(self._BASE_URL, item.split('$')[3]), TarangPlusVideoIE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url_type, display_id = self._match_valid_url(url).group('type', 'id')
|
||||
series_url = f'{self._BASE_URL}/{url_type}/{display_id}'
|
||||
webpage = self._download_webpage(series_url, display_id)
|
||||
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._entries, f'{series_url}/episodes', display_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, display_id, self._hidden_inputs(webpage).get('title'))
|
||||
|
||||
|
||||
class TarangPlusPlaylistIE(TarangPlusBaseIE):
|
||||
IE_NAME = 'tarangplus:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?P<id>[^#?/]+)/all/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://tarangplus.in/chhota-jaga/all',
|
||||
'info_dict': {
|
||||
'id': 'chhota-jaga',
|
||||
'title': 'Chhota Jaga',
|
||||
},
|
||||
'playlist_mincount': 33,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/kids-yali-show/all',
|
||||
'info_dict': {
|
||||
'id': 'kids-yali-show',
|
||||
'title': 'Yali',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/trailer/all',
|
||||
'info_dict': {
|
||||
'id': 'trailer',
|
||||
'title': 'Trailer',
|
||||
},
|
||||
'playlist_mincount': 57,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/latest-songs/all',
|
||||
'info_dict': {
|
||||
'id': 'latest-songs',
|
||||
'title': 'Latest Songs',
|
||||
},
|
||||
'playlist_mincount': 46,
|
||||
}, {
|
||||
'url': 'https://tarangplus.in/premium-serials-episodes/all',
|
||||
'info_dict': {
|
||||
'id': 'premium-serials-episodes',
|
||||
'title': 'Primetime Latest Episodes',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}]
|
||||
|
||||
def _entries(self, webpage):
|
||||
for url_path in traverse_obj(webpage, (
|
||||
{find_elements(cls='item')}, ...,
|
||||
{find_elements(tag='a', attr='href', value='/.+', html=True, regex=True)},
|
||||
..., {extract_attributes}, 'href',
|
||||
)):
|
||||
yield self.url_result(urljoin(self._BASE_URL, url_path), TarangPlusVideoIE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(webpage), display_id,
|
||||
traverse_obj(webpage, ({find_element(id='al_title')}, {clean_html})))
|
||||
@@ -6,21 +6,20 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TelecincoBaseIE(InfoExtractor):
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId'][1]
|
||||
video_id = content['dataMediaId']
|
||||
config = self._download_json(
|
||||
content['dataConfig'][1], video_id, 'Downloading config JSON')
|
||||
content['dataConfig'], video_id, 'Downloading config JSON')
|
||||
services = config['services']
|
||||
caronte = self._download_json(services['caronte'], video_id)
|
||||
if traverse_obj(caronte, ('dls', 0, 'drm', {bool})):
|
||||
@@ -58,9 +57,9 @@ class TelecincoBaseIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': traverse_obj(config, ('info', 'title', {str})),
|
||||
'formats': formats,
|
||||
'thumbnail': (traverse_obj(content, ('dataPoster', 1, {url_or_none}))
|
||||
'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none}))
|
||||
or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)),
|
||||
'duration': traverse_obj(content, ('dataDuration', 1, {int_or_none})),
|
||||
'duration': traverse_obj(content, ('dataDuration', {int_or_none})),
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
@@ -138,45 +137,30 @@ class TelecincoIE(TelecincoBaseIE):
|
||||
'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_ASTRO_ISLAND_RE = re.compile(r'<astro-island\b[^>]+>')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||
article = self._search_json(
|
||||
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
||||
webpage, 'article', display_id)['article']
|
||||
description = traverse_obj(article, ('leadParagraph', {clean_html}, filter))
|
||||
|
||||
props_list = traverse_obj(webpage, (
|
||||
{self._ASTRO_ISLAND_RE.findall}, ...,
|
||||
{extract_attributes}, 'props', {json.loads}))
|
||||
|
||||
description = traverse_obj(props_list, (..., 'leadParagraph', 1, {clean_html}, any, filter))
|
||||
main_content = traverse_obj(props_list, (..., ('content', ('articleData', 1, 'opening')), 1, {dict}, any))
|
||||
|
||||
if traverse_obj(props_list, (..., 'editorialType', 1, {str}, any)) != 'VID': # e.g. 'ART'
|
||||
if article.get('editorialType') != 'VID':
|
||||
entries = []
|
||||
|
||||
for p in traverse_obj(props_list, (..., 'articleData', 1, ('opening', ('body', 1, ...)), 1, {dict})):
|
||||
type_ = traverse_obj(p, ('type', 1, {str}))
|
||||
content = traverse_obj(p, ('content', 1, {str} if type_ == 'paragraph' else {dict}))
|
||||
if not content:
|
||||
continue
|
||||
if type_ == 'paragraph':
|
||||
for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])):
|
||||
content = p['content']
|
||||
type_ = p.get('type')
|
||||
if type_ == 'paragraph' and isinstance(content, str):
|
||||
description = join_nonempty(description, content, delim='')
|
||||
elif type_ == 'video':
|
||||
elif type_ == 'video' and isinstance(content, dict):
|
||||
entries.append(self._parse_content(content, url))
|
||||
else:
|
||||
self.report_warning(
|
||||
f'Skipping unsupported content type "{type_}"', display_id, only_once=True)
|
||||
|
||||
return self.playlist_result(
|
||||
entries,
|
||||
traverse_obj(props_list, (..., 'id', 1, {int}, {str_or_none}, any)) or display_id,
|
||||
traverse_obj(main_content, ('dataTitle', 1, {str})),
|
||||
clean_html(description))
|
||||
entries, str_or_none(article.get('id')),
|
||||
traverse_obj(article, ('title', {str})), clean_html(description))
|
||||
|
||||
if not main_content:
|
||||
raise ExtractorError('Unable to extract main content from webpage')
|
||||
|
||||
info = self._parse_content(main_content, url)
|
||||
info = self._parse_content(article['opening']['content'], url)
|
||||
info['description'] = description
|
||||
|
||||
return info
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
import urllib.parse
|
||||
import json
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from .zype import ZypeIE
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
filter_dict,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
try_call,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ThisOldHouseIE(InfoExtractor):
|
||||
@@ -76,43 +77,46 @@ class ThisOldHouseIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
'https://www.thisoldhouse.com/insider-login', None, 'Downloading login page')
|
||||
hidden_inputs = self._hidden_inputs(login_page)
|
||||
response = self._download_json(
|
||||
'https://www.thisoldhouse.com/wp-admin/admin-ajax.php', None, 'Logging in',
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=urlencode_postdata(filter_dict({
|
||||
'action': 'onebill_subscriber_login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'pricingPlanTerm': hidden_inputs['pricing_plan_term'],
|
||||
'utm_parameters': hidden_inputs.get('utm_parameters'),
|
||||
'nonce': hidden_inputs['mdcr_onebill_login_nonce'],
|
||||
})))
|
||||
_LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
|
||||
|
||||
message = traverse_obj(response, ('data', 'message', {str}))
|
||||
if not response['success']:
|
||||
if message and 'Something went wrong' in message:
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
|
||||
urlh = self._request_webpage(
|
||||
'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
|
||||
errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
|
||||
|
||||
try:
|
||||
auth_form = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Submitting credentials', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': urlh.url,
|
||||
}, data=json.dumps(filter_dict({
|
||||
**{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
|
||||
'tenant': 'thisoldhouse',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'popup_options': {},
|
||||
'sso': True,
|
||||
'_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
|
||||
'_intstate': 'deprecated',
|
||||
}), separators=(',', ':')).encode())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
raise ExtractorError(message or 'Login was unsuccessful')
|
||||
if message and 'Your subscription is not active' in message:
|
||||
self.report_warning(
|
||||
f'{self.IE_NAME} said your subscription is not active. '
|
||||
f'If your subscription is active, this could be caused by too many sign-ins, '
|
||||
f'and you should instead try using {self._login_hint(method="cookies")[4:]}')
|
||||
else:
|
||||
self.write_debug(f'{self.IE_NAME} said: {message}')
|
||||
raise
|
||||
|
||||
self._request_webpage(
|
||||
'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
|
||||
data=urlencode_postdata(self._hidden_inputs(auth_form)))
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
# If login response says inactive subscription, site redirects to frontpage for Insider content
|
||||
if 'To Unlock This content' in webpage or urllib.parse.urlparse(urlh.url).path in ('', '/'):
|
||||
self.raise_login_required('This video is only available for subscribers')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if 'To Unlock This content' in webpage:
|
||||
self.raise_login_required(
|
||||
'This video is only available for subscribers. '
|
||||
'Note that --cookies-from-browser may not work due to this site using session cookies')
|
||||
|
||||
video_url, video_id = self._search_regex(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
|
||||
|
||||
@@ -454,7 +454,6 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'like_count': 'digg_count',
|
||||
'repost_count': 'share_count',
|
||||
'comment_count': 'comment_count',
|
||||
'save_count': 'collect_count',
|
||||
}, expected_type=int_or_none),
|
||||
**author_info,
|
||||
'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None),
|
||||
@@ -608,7 +607,6 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'like_count': 'diggCount',
|
||||
'repost_count': 'shareCount',
|
||||
'comment_count': 'commentCount',
|
||||
'save_count': 'collectCount',
|
||||
}), expected_type=int_or_none),
|
||||
'thumbnails': [
|
||||
{
|
||||
@@ -648,7 +646,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'artist': 'Ysrbeats',
|
||||
'album': 'Lehanga',
|
||||
'track': 'Lehanga',
|
||||
@@ -678,7 +675,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'artists': ['Evan Todd', 'Jessica Keenan Wynn', 'Alice Lee', 'Barrett Wilbert Weed', 'Jon Eidson'],
|
||||
'track': 'Big Fun',
|
||||
},
|
||||
@@ -706,7 +702,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
},
|
||||
}, {
|
||||
# Sponsored video, only available with feed workaround
|
||||
@@ -730,7 +725,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
}, {
|
||||
@@ -757,7 +751,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
},
|
||||
}, {
|
||||
# hydration JSON is sent in a <script> element
|
||||
@@ -780,7 +773,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
}, {
|
||||
@@ -806,7 +798,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
|
||||
},
|
||||
}, {
|
||||
@@ -833,7 +824,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:^https://.+',
|
||||
'thumbnails': 'count:3',
|
||||
},
|
||||
@@ -861,7 +851,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:^https://.+\.webp',
|
||||
},
|
||||
'skip': 'Unavailable via feed API, only audio available via web',
|
||||
@@ -890,7 +879,6 @@ class TikTokIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
|
||||
},
|
||||
}, {
|
||||
@@ -1300,7 +1288,6 @@ class DouyinIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}, {
|
||||
@@ -1325,7 +1312,6 @@ class DouyinIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}, {
|
||||
@@ -1350,7 +1336,6 @@ class DouyinIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}, {
|
||||
@@ -1368,7 +1353,6 @@ class DouyinIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
@@ -1393,7 +1377,6 @@ class DouyinIE(TikTokBaseIE):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}]
|
||||
@@ -1454,7 +1437,6 @@ class TikTokVMIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'save_count': int,
|
||||
'thumbnail': r're:https://.+\.webp.*',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAdZ_NcPPgMneaGrW0hN8O_J_bwLshwNNERRF5DxOw2HKIzk0kdlLrR8RkVl1ksrMO',
|
||||
'duration': 29,
|
||||
|
||||
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
class TubiTvIE(InfoExtractor):
|
||||
IE_NAME = 'tubitv'
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:[a-z]{2}-[a-z]{2}/)?(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://tubitv.com/login'
|
||||
_NETRC_MACHINE = 'tubitv'
|
||||
_TESTS = [{
|
||||
@@ -73,9 +73,6 @@ class TubiTvIE(InfoExtractor):
|
||||
'release_year': 1979,
|
||||
},
|
||||
'skip': 'Content Unavailable',
|
||||
}, {
|
||||
'url': 'https://tubitv.com/es-mx/tv-shows/477363/s01-e03-jacob-dos-dos-y-la-tarjets-de-hockey-robada',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# DRM formats are included only to raise appropriate error
|
||||
@@ -185,13 +182,13 @@ class TubiTvShowIE(InfoExtractor):
|
||||
webpage = self._download_webpage(show_url, playlist_id)
|
||||
|
||||
data = self._search_json(
|
||||
r'window\.__REACT_QUERY_STATE__\s*=', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)['queries'][0]['state']['data']
|
||||
r'window\.__data\s*=', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)['video']
|
||||
|
||||
# v['number'] is already a decimal string, but stringify to protect against API changes
|
||||
path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}]
|
||||
|
||||
for season in traverse_obj(data, ('seasons', *path)):
|
||||
for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)):
|
||||
season_number = int_or_none(season.get('number'))
|
||||
for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])):
|
||||
episode_id = episode['id']
|
||||
|
||||
@@ -20,8 +20,6 @@ class TumblrIE(InfoExtractor):
|
||||
'id': '54196191430',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:dfac39636969fe6bf1caa2d50405f069',
|
||||
'timestamp': 1372531260,
|
||||
'upload_date': '20130629',
|
||||
'description': 'md5:390ab77358960235b6937ab3b8528956',
|
||||
'uploader_id': 'tatianamaslanydaily',
|
||||
'uploader_url': 'https://tatianamaslanydaily.tumblr.com/',
|
||||
@@ -41,8 +39,6 @@ class TumblrIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Mona\xa0“talking” in\xa0“english”',
|
||||
'description': 'md5:082a3a621530cb786ad2b7592a6d9e2c',
|
||||
'timestamp': 1597865276,
|
||||
'upload_date': '20200819',
|
||||
'uploader_id': 'maskofthedragon',
|
||||
'uploader_url': 'https://maskofthedragon.tumblr.com/',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
@@ -80,8 +76,6 @@ class TumblrIE(InfoExtractor):
|
||||
'id': '159704441298',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ba79365861101f4911452728d2950561',
|
||||
'timestamp': 1492489550,
|
||||
'upload_date': '20170418',
|
||||
'description': 'md5:773738196cea76b6996ec71e285bdabc',
|
||||
'uploader_id': 'jujanon',
|
||||
'uploader_url': 'https://jujanon.tumblr.com/',
|
||||
@@ -99,8 +93,6 @@ class TumblrIE(InfoExtractor):
|
||||
'id': '180294460076',
|
||||
'ext': 'mp4',
|
||||
'title': 'duality of bird',
|
||||
'timestamp': 1542651819,
|
||||
'upload_date': '20181119',
|
||||
'description': 'duality of bird',
|
||||
'uploader_id': 'todaysbird',
|
||||
'uploader_url': 'https://todaysbird.tumblr.com/',
|
||||
@@ -246,8 +238,6 @@ class TumblrIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '730460905855467520',
|
||||
'uploader_id': 'felixcosm',
|
||||
'upload_date': '20231006',
|
||||
'timestamp': 1696621805,
|
||||
'repost_count': int,
|
||||
'tags': 'count:15',
|
||||
'description': 'md5:2eb3482a3c6987280cbefb6839068f32',
|
||||
@@ -337,8 +327,6 @@ class TumblrIE(InfoExtractor):
|
||||
'url': 'https://www.tumblr.com/anyaboz/765332564457209856/my-music-video-for-selkie-by-nobodys-wolf-child',
|
||||
'info_dict': {
|
||||
'id': '765332564457209856',
|
||||
'timestamp': 1729878010,
|
||||
'upload_date': '20241025',
|
||||
'uploader_id': 'anyaboz',
|
||||
'repost_count': int,
|
||||
'age_limit': 0,
|
||||
@@ -457,8 +445,6 @@ class TumblrIE(InfoExtractor):
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': f'https://{uploader_id}.tumblr.com/' if uploader_id else None,
|
||||
**traverse_obj(post_json, {
|
||||
# Try oldest post in reblog chain, fall back to timestamp of the post itself
|
||||
'timestamp': ((('trail', 0, 'post'), None), 'timestamp', {int_or_none}, any),
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'repost_count': ('reblog_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
|
||||
@@ -1,18 +1,14 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TV5UnisBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
groups = self._match_valid_url(url).groups()
|
||||
@@ -20,136 +16,96 @@ class TV5UnisBaseIE(InfoExtractor):
|
||||
'https://api.tv5unis.ca/graphql', groups[0], query={
|
||||
'query': '''{
|
||||
%s(%s) {
|
||||
title
|
||||
summary
|
||||
tags
|
||||
duration
|
||||
seasonNumber
|
||||
episodeNumber
|
||||
collection {
|
||||
title
|
||||
}
|
||||
episodeNumber
|
||||
rating {
|
||||
name
|
||||
}
|
||||
seasonNumber
|
||||
tags
|
||||
title
|
||||
videoElement {
|
||||
__typename
|
||||
... on Video {
|
||||
mediaId
|
||||
encodings {
|
||||
hls {
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
... on RestrictedVideo {
|
||||
code
|
||||
reason
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (self._GQL_QUERY_NAME, self._gql_args(groups)), # noqa: UP031
|
||||
})['data'][self._GQL_QUERY_NAME]
|
||||
|
||||
video = product['videoElement']
|
||||
if video is None:
|
||||
raise ExtractorError('This content is no longer available', expected=True)
|
||||
|
||||
if video.get('__typename') == 'RestrictedVideo':
|
||||
code = video.get('code')
|
||||
if code == 1001:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
reason = video.get('reason')
|
||||
raise ExtractorError(join_nonempty(
|
||||
'This video is restricted',
|
||||
code is not None and f', error code {code}',
|
||||
reason and f': {remove_end(reason, ".")}',
|
||||
delim=''))
|
||||
|
||||
media_id = video['mediaId']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
video['encodings']['hls']['url'], media_id, 'mp4')
|
||||
media_id = product['videoElement']['mediaId']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_id,
|
||||
'_old_archive_ids': [make_archive_id('LimelightMedia', media_id)],
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(product, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('summary', {str}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'series': ('collection', 'title', {str}),
|
||||
'age_limit': ('rating', 'name', {parse_age_limit}),
|
||||
}),
|
||||
'title': product.get('title'),
|
||||
'url': smuggle_url('limelight:media:' + media_id, {'geo_countries': self._GEO_COUNTRIES}),
|
||||
'age_limit': parse_age_limit(try_get(product, lambda x: x['rating']['name'])),
|
||||
'tags': product.get('tags'),
|
||||
'series': try_get(product, lambda x: x['collection']['title']),
|
||||
'season_number': int_or_none(product.get('seasonNumber')),
|
||||
'episode_number': int_or_none(product.get('episodeNumber')),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
class TV5UnisVideoIE(TV5UnisBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'tv5unis:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tv5unis.ca/videos/bande-annonces/144041',
|
||||
'md5': '24a247c96119d77fe1bae8b440457dfa',
|
||||
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.tv5unis.ca/videos/bande-annonces/71843',
|
||||
'md5': '3d794164928bda97fb87a17e89923d9b',
|
||||
'info_dict': {
|
||||
'id': '56862325352147149dce0ae139afced6',
|
||||
'_old_archive_ids': ['limelightmedia 56862325352147149dce0ae139afced6'],
|
||||
'id': 'a883684aecb2486cad9bdc7bbe17f861',
|
||||
'ext': 'mp4',
|
||||
'title': 'Antigone',
|
||||
'description': r"re:En aidant son frère .+ dicté par l'amour et la solidarité.",
|
||||
'duration': 61,
|
||||
'title': 'Watatatow',
|
||||
'duration': 10.01,
|
||||
},
|
||||
}]
|
||||
}
|
||||
_GQL_QUERY_NAME = 'productById'
|
||||
|
||||
@staticmethod
|
||||
def _gql_args(groups):
|
||||
return f'id: {groups[0]}'
|
||||
return f'id: {groups}'
|
||||
|
||||
|
||||
class TV5UnisIE(TV5UnisBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'tv5unis'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P<id>[^/?#]+)(?:/saisons/(?P<season_number>\d+)/episodes/(?P<episode_number>\d+))?/?(?:[?#&]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P<id>[^/]+)(?:/saisons/(?P<season_number>\d+)/episodes/(?P<episode_number>\d+))?/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
# geo-restricted to Canada; xff is ineffective
|
||||
'url': 'https://www.tv5unis.ca/videos/watatatow/saisons/11/episodes/1',
|
||||
'md5': '43beebd47eefb1c5caf9a47a3fc35589',
|
||||
'url': 'https://www.tv5unis.ca/videos/watatatow/saisons/6/episodes/1',
|
||||
'md5': 'a479907d2e531a73e1f8dc48d6388d02',
|
||||
'info_dict': {
|
||||
'id': '2c06e4af20f0417b86c2536825287690',
|
||||
'_old_archive_ids': ['limelightmedia 2c06e4af20f0417b86c2536825287690'],
|
||||
'id': 'e5ee23a586c44612a56aad61accf16ef',
|
||||
'ext': 'mp4',
|
||||
'title': "L'homme éléphant",
|
||||
'description': r're:Paul-André et Jean-Yves, .+ quand elle parle du feu au Spot.',
|
||||
'title': 'Je ne peux pas lui résister',
|
||||
'description': "Atys, le nouveau concierge de l'école, a réussi à ébranler la confiance de Mado en affirmant qu'une médaille, ce n'est que du métal. Comme Mado essaie de lui prouver que ses valeurs sont solides, il veut la mettre à l'épreuve...",
|
||||
'subtitles': {
|
||||
'fr': 'count:1',
|
||||
},
|
||||
'duration': 1440,
|
||||
'duration': 1370,
|
||||
'age_limit': 8,
|
||||
'tags': 'count:4',
|
||||
'tags': 'count:3',
|
||||
'series': 'Watatatow',
|
||||
'season': 'Season 11',
|
||||
'season_number': 11,
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 6,
|
||||
'episode_number': 1,
|
||||
},
|
||||
}, {
|
||||
# geo-restricted to Canada; xff is ineffective
|
||||
'url': 'https://www.tv5unis.ca/videos/boite-a-savon',
|
||||
'md5': '7898e868e8c540f03844660e0aab6bbe',
|
||||
'url': 'https://www.tv5unis.ca/videos/le-voyage-de-fanny',
|
||||
'md5': '9ca80ebb575c681d10cae1adff3d4774',
|
||||
'info_dict': {
|
||||
'id': '4de6d0c6467b4511a0c04b92037a9f15',
|
||||
'_old_archive_ids': ['limelightmedia 4de6d0c6467b4511a0c04b92037a9f15'],
|
||||
'id': '726188eefe094d8faefb13381d42bc06',
|
||||
'ext': 'mp4',
|
||||
'title': 'Boîte à savon',
|
||||
'description': r're:Dans le petit village de Broche-à-foin, .+ celle qui fait battre son coeur.',
|
||||
'title': 'Le voyage de Fanny',
|
||||
'description': "Fanny, 12 ans, cachée dans un foyer loin de ses parents, s'occupe de ses deux soeurs. Devant fuir, Fanny prend la tête d'un groupe de huit enfants et s'engage dans un dangereux périple à travers la France occupée pour rejoindre la frontière suisse.",
|
||||
'subtitles': {
|
||||
'fr': 'count:1',
|
||||
},
|
||||
'duration': 1200,
|
||||
'tags': 'count:5',
|
||||
'duration': 5587.034,
|
||||
'tags': 'count:4',
|
||||
},
|
||||
}]
|
||||
_GQL_QUERY_NAME = 'productByRootProductSlug'
|
||||
|
||||
@@ -680,10 +680,6 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
}],
|
||||
f'Downloading {self._NODE_KIND}s GraphQL page {page_num}',
|
||||
fatal=False)
|
||||
# Avoid extracting random/unrelated entries when channel_name doesn't exist
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/15450
|
||||
if traverse_obj(page, (0, 'data', 'user', 'id', {str})) == '':
|
||||
raise ExtractorError(f'Channel "{channel_name}" not found', expected=True)
|
||||
if not page:
|
||||
break
|
||||
edges = try_get(
|
||||
|
||||
@@ -32,11 +32,67 @@ from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class TwitterBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'twitter'
|
||||
_API_BASE = 'https://api.x.com/1.1/'
|
||||
_GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
|
||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
||||
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
|
||||
_flow_token = None
|
||||
|
||||
_LOGIN_INIT_DATA = json.dumps({
|
||||
'input_flow_data': {
|
||||
'flow_context': {
|
||||
'debug_overrides': {},
|
||||
'start_location': {
|
||||
'location': 'unknown',
|
||||
},
|
||||
},
|
||||
},
|
||||
'subtask_versions': {
|
||||
'action_list': 2,
|
||||
'alert_dialog': 1,
|
||||
'app_download_cta': 1,
|
||||
'check_logged_in_account': 1,
|
||||
'choice_selection': 3,
|
||||
'contacts_live_sync_permission_prompt': 0,
|
||||
'cta': 7,
|
||||
'email_verification': 2,
|
||||
'end_flow': 1,
|
||||
'enter_date': 1,
|
||||
'enter_email': 2,
|
||||
'enter_password': 5,
|
||||
'enter_phone': 2,
|
||||
'enter_recaptcha': 1,
|
||||
'enter_text': 5,
|
||||
'enter_username': 2,
|
||||
'generic_urt': 3,
|
||||
'in_app_notification': 1,
|
||||
'interest_picker': 3,
|
||||
'js_instrumentation': 1,
|
||||
'menu_dialog': 1,
|
||||
'notifications_permission_prompt': 2,
|
||||
'open_account': 2,
|
||||
'open_home_timeline': 1,
|
||||
'open_link': 1,
|
||||
'phone_verification': 4,
|
||||
'privacy_options': 1,
|
||||
'security_key': 3,
|
||||
'select_avatar': 4,
|
||||
'select_banner': 2,
|
||||
'settings_list': 7,
|
||||
'show_code': 1,
|
||||
'sign_up': 2,
|
||||
'sign_up_review': 4,
|
||||
'tweet_selection_urt': 1,
|
||||
'update_users': 1,
|
||||
'upload_media': 1,
|
||||
'user_recommendations_list': 4,
|
||||
'user_recommendations_urt': 1,
|
||||
'wait_spinner': 3,
|
||||
'web_modal': 1,
|
||||
},
|
||||
}, separators=(',', ':')).encode()
|
||||
|
||||
def _extract_variant_formats(self, variant, video_id):
|
||||
variant_url = variant.get('url')
|
||||
@@ -116,6 +172,135 @@ class TwitterBaseIE(InfoExtractor):
|
||||
'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
|
||||
})
|
||||
|
||||
def _call_login_api(self, note, headers, query={}, data=None):
|
||||
response = self._download_json(
|
||||
f'{self._API_BASE}onboarding/task.json', None, note,
|
||||
headers=headers, query=query, data=data, expected_status=400)
|
||||
error = traverse_obj(response, ('errors', 0, 'message', {str}))
|
||||
if error:
|
||||
raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
|
||||
elif traverse_obj(response, 'status') != 'success':
|
||||
raise ExtractorError('Login was unsuccessful')
|
||||
|
||||
subtask = traverse_obj(
|
||||
response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
|
||||
if not subtask:
|
||||
raise ExtractorError('Twitter API did not return next login subtask')
|
||||
|
||||
self._flow_token = response['flow_token']
|
||||
|
||||
return subtask
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
return
|
||||
|
||||
guest_token = self._fetch_guest_token(None)
|
||||
headers = {
|
||||
**self._set_base_headers(),
|
||||
'content-type': 'application/json',
|
||||
'x-guest-token': guest_token,
|
||||
'x-twitter-client-language': 'en',
|
||||
'x-twitter-active-user': 'yes',
|
||||
'Referer': 'https://x.com/',
|
||||
'Origin': 'https://x.com',
|
||||
}
|
||||
|
||||
def build_login_json(*subtask_inputs):
|
||||
return json.dumps({
|
||||
'flow_token': self._flow_token,
|
||||
'subtask_inputs': subtask_inputs,
|
||||
}, separators=(',', ':')).encode()
|
||||
|
||||
def input_dict(subtask_id, text):
|
||||
return {
|
||||
'subtask_id': subtask_id,
|
||||
'enter_text': {
|
||||
'text': text,
|
||||
'link': 'next_link',
|
||||
},
|
||||
}
|
||||
|
||||
next_subtask = self._call_login_api(
|
||||
'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
|
||||
|
||||
while not self.is_logged_in:
|
||||
if next_subtask == 'LoginJsInstrumentationSubtask':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting JS instrumentation response', headers, data=build_login_json({
|
||||
'subtask_id': next_subtask,
|
||||
'js_instrumentation': {
|
||||
'response': '{}',
|
||||
'link': 'next_link',
|
||||
},
|
||||
}))
|
||||
|
||||
elif next_subtask == 'LoginEnterUserIdentifierSSO':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting username', headers, data=build_login_json({
|
||||
'subtask_id': next_subtask,
|
||||
'settings_list': {
|
||||
'setting_responses': [{
|
||||
'key': 'user_identifier',
|
||||
'response_data': {
|
||||
'text_data': {
|
||||
'result': username,
|
||||
},
|
||||
},
|
||||
}],
|
||||
'link': 'next_link',
|
||||
},
|
||||
}))
|
||||
|
||||
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting alternate identifier', headers,
|
||||
data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
|
||||
'one of username, phone number or email that was not used as --username'))))
|
||||
|
||||
elif next_subtask == 'LoginEnterPassword':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting password', headers, data=build_login_json({
|
||||
'subtask_id': next_subtask,
|
||||
'enter_password': {
|
||||
'password': password,
|
||||
'link': 'next_link',
|
||||
},
|
||||
}))
|
||||
|
||||
elif next_subtask == 'AccountDuplicationCheck':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting account duplication check', headers, data=build_login_json({
|
||||
'subtask_id': next_subtask,
|
||||
'check_logged_in_account': {
|
||||
'link': 'AccountDuplicationCheck_false',
|
||||
},
|
||||
}))
|
||||
|
||||
elif next_subtask == 'LoginTwoFactorAuthChallenge':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting 2FA token', headers, data=build_login_json(input_dict(
|
||||
next_subtask, self._get_tfa_info('two-factor authentication token'))))
|
||||
|
||||
elif next_subtask == 'LoginAcid':
|
||||
next_subtask = self._call_login_api(
|
||||
'Submitting confirmation code', headers, data=build_login_json(input_dict(
|
||||
next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
|
||||
|
||||
elif next_subtask == 'ArkoseLogin':
|
||||
self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
|
||||
|
||||
elif next_subtask == 'DenyLoginSubtask':
|
||||
self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
|
||||
|
||||
elif next_subtask == 'LoginSuccessSubtask':
|
||||
raise ExtractorError('Twitter API did not grant auth token cookie')
|
||||
|
||||
else:
|
||||
raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
|
||||
|
||||
self.report_login()
|
||||
|
||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||
headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
|
||||
headers.update({
|
||||
@@ -231,7 +416,6 @@ class TwitterCardIE(InfoExtractor):
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'skip': 'The page does not exist',
|
||||
},
|
||||
{
|
||||
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
||||
@@ -433,7 +617,6 @@ class TwitterIE(TwitterBaseIE):
|
||||
'comment_count': int,
|
||||
'_old_archive_ids': ['twitter 852138619213144067'],
|
||||
},
|
||||
'skip': 'Suspended',
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||
'info_dict': {
|
||||
@@ -580,10 +763,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||
'info_dict': {
|
||||
'id': '1577719286659006464',
|
||||
'title': r're:Ultima.* - Test$',
|
||||
'title': 'Ultima - Test',
|
||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||
'channel_id': '168922496',
|
||||
'uploader': r're:Ultima.*',
|
||||
'uploader': 'Ultima',
|
||||
'uploader_id': 'UltimaShadowX',
|
||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||
'upload_date': '20221005',
|
||||
@@ -712,12 +895,11 @@ class TwitterIE(TwitterBaseIE):
|
||||
'uploader': r're:Monique Camarra.+?',
|
||||
'uploader_id': 'MoniqueCamarra',
|
||||
'live_status': 'was_live',
|
||||
'release_timestamp': 1658417305,
|
||||
'release_timestamp': 1658417414,
|
||||
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
|
||||
'timestamp': 1658407771,
|
||||
'release_date': '20220721',
|
||||
'upload_date': '20220721',
|
||||
'thumbnail': 'https://pbs.twimg.com/profile_images/1920514378006188033/xQs6J_yI_400x400.jpg',
|
||||
},
|
||||
'add_ie': ['TwitterSpaces'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
@@ -828,10 +1010,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||
'age_limit': 0,
|
||||
'uploader': 'D U N I Y A',
|
||||
'uploader': 'Boy Called Mün',
|
||||
'repost_count': int,
|
||||
'upload_date': '20221206',
|
||||
'title': 'D U N I Y A - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||
'title': 'Boy Called Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': [],
|
||||
@@ -886,7 +1068,6 @@ class TwitterIE(TwitterBaseIE):
|
||||
'comment_count': int,
|
||||
'_old_archive_ids': ['twitter 1695424220702888009'],
|
||||
},
|
||||
'skip': 'Suspended',
|
||||
}, {
|
||||
# retweeted_status w/ legacy API
|
||||
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
||||
@@ -911,7 +1092,6 @@ class TwitterIE(TwitterBaseIE):
|
||||
'_old_archive_ids': ['twitter 1695424220702888009'],
|
||||
},
|
||||
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
||||
'skip': 'Suspended',
|
||||
}, {
|
||||
# Broadcast embedded in tweet
|
||||
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
|
||||
@@ -955,6 +1135,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
}, {
|
||||
# "stale tweet" with typename "TweetWithVisibilityResults"
|
||||
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
|
||||
'md5': '511377ff8dfa7545307084dca4dce319',
|
||||
'info_dict': {
|
||||
'id': '1724883339285544960',
|
||||
'ext': 'mp4',
|
||||
@@ -1001,30 +1182,6 @@ class TwitterIE(TwitterBaseIE):
|
||||
'age_limit': 0,
|
||||
'_old_archive_ids': ['twitter 1790637656616943991'],
|
||||
},
|
||||
}, {
|
||||
# unified_card with 2 items of type video and photo
|
||||
'url': 'https://x.com/TopHeroes_/status/2001950365332455490',
|
||||
'info_dict': {
|
||||
'id': '2001841416071450628',
|
||||
'ext': 'mp4',
|
||||
'display_id': '2001950365332455490',
|
||||
'title': 'Top Heroes - Forgot to close My heroes solo level up in my phone ✨Unlock the fog,...',
|
||||
'description': r're:Forgot to close My heroes solo level up in my phone ✨Unlock the fog.+',
|
||||
'uploader': 'Top Heroes',
|
||||
'uploader_id': 'TopHeroes_',
|
||||
'uploader_url': 'https://twitter.com/TopHeroes_',
|
||||
'channel_id': '1737324725620326400',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 30.278,
|
||||
'thumbnail': 'https://pbs.twimg.com/amplify_video_thumb/2001841416071450628/img/hpy5KpJh4pO17b65.jpg?name=orig',
|
||||
'tags': [],
|
||||
'timestamp': 1766137136,
|
||||
'upload_date': '20251219',
|
||||
'_old_archive_ids': ['twitter 2001950365332455490'],
|
||||
},
|
||||
}, {
|
||||
# onion route
|
||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||
@@ -1265,14 +1422,14 @@ class TwitterIE(TwitterBaseIE):
|
||||
if not card:
|
||||
return
|
||||
|
||||
card_name = card['name'].split(':')[-1]
|
||||
self.write_debug(f'Extracting from {card_name} card info: {card.get("url")}')
|
||||
self.write_debug(f'Extracting from card info: {card.get("url")}')
|
||||
binding_values = card['binding_values']
|
||||
|
||||
def get_binding_value(k):
|
||||
o = binding_values.get(k) or {}
|
||||
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
|
||||
|
||||
card_name = card['name'].split(':')[-1]
|
||||
if card_name == 'player':
|
||||
yield {
|
||||
'_type': 'url',
|
||||
@@ -1304,7 +1461,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
elif card_name == 'unified_card':
|
||||
unified_card = self._parse_json(get_binding_value('unified_card'), twid)
|
||||
yield from map(extract_from_video_info, traverse_obj(
|
||||
unified_card, ('media_entities', lambda _, v: v['type'] == 'video')))
|
||||
unified_card, ('media_entities', ...), expected_type=dict))
|
||||
# amplify, promo_video_website, promo_video_convo, appplayer,
|
||||
# video_direct_message, poll2choice_video, poll3choice_video,
|
||||
# poll4choice_video, ...
|
||||
|
||||
@@ -7,15 +7,15 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class URPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
|
||||
'md5': '5ba36643c77cc3d34ffeadad89937d1e',
|
||||
'info_dict': {
|
||||
'id': '203704',
|
||||
'ext': 'mp4',
|
||||
@@ -31,7 +31,6 @@ class URPlayIE(InfoExtractor):
|
||||
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
||||
'age_limit': 15,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://urplay.se/program/222967-en-foralders-dagbok-mitt-barn-skadar-sig-sjalv',
|
||||
'info_dict': {
|
||||
@@ -50,7 +49,6 @@ class URPlayIE(InfoExtractor):
|
||||
'tags': 'count:7',
|
||||
'episode': 'Mitt barn skadar sig själv',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||
'info_dict': {
|
||||
@@ -70,27 +68,6 @@ class URPlayIE(InfoExtractor):
|
||||
'episode': 'Sovkudde',
|
||||
'season': 'Säsong 1',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Only accessible through new media api
|
||||
'url': 'https://urplay.se/program/242932-vulkanernas-krafter-fran-kraftfull-till-forgorande',
|
||||
'info_dict': {
|
||||
'id': '242932',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanernas krafter : Från kraftfull till förgörande',
|
||||
'description': 'md5:742bb87048e7d5a7f209d28f9bb70ab1',
|
||||
'age_limit': 15,
|
||||
'duration': 2613,
|
||||
'thumbnail': 'https://assets.ur.se/id/242932/images/1_hd.jpg',
|
||||
'categories': ['Vetenskap & teknik'],
|
||||
'tags': ['Geofysik', 'Naturvetenskap', 'Vulkaner', 'Vulkanutbrott'],
|
||||
'series': 'Vulkanernas krafter',
|
||||
'episode': 'Från kraftfull till förgörande',
|
||||
'episode_number': 2,
|
||||
'timestamp': 1763514000,
|
||||
'upload_date': '20251119',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
|
||||
'only_matching': True,
|
||||
@@ -111,12 +88,21 @@ class URPlayIE(InfoExtractor):
|
||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
||||
urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
|
||||
episode = urplayer_data['title']
|
||||
sources = self._download_json(
|
||||
f'https://media-api.urplay.se/config-streaming/v1/urplay/sources/{video_id}', video_id,
|
||||
note='Downloading streaming information')
|
||||
hls_url = traverse_obj(sources, ('sources', 'hls', {url_or_none}, {require('HLS URL')}))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
||||
formats = []
|
||||
urplayer_streams = urplayer_data.get('streamingInfo', {})
|
||||
|
||||
for k, v in urplayer_streams.get('raw', {}).items():
|
||||
if not (k in ('sd', 'hd', 'mp3', 'm4a') and isinstance(v, dict)):
|
||||
continue
|
||||
file_http = v.get('location')
|
||||
if file_http:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
f'http://{host}/{file_http}playlist.m3u8',
|
||||
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||
|
||||
subtitles = {}
|
||||
|
||||
def parse_lang_code(code):
|
||||
"3-character language code or None (utils candidate)"
|
||||
|
||||
@@ -339,20 +339,11 @@ class WistiaChannelIE(WistiaBaseIE):
|
||||
'title': 'The Roof S2: The Modern CRO',
|
||||
'thumbnail': r're:https?://embed(?:-ssl)?\.wistia\.com/.+\.(?:jpg|png)',
|
||||
'duration': 86.487,
|
||||
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season. ',
|
||||
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n',
|
||||
'timestamp': 1619790290,
|
||||
'upload_date': '20210430',
|
||||
},
|
||||
'params': {'noplaylist': True, 'skip_download': True},
|
||||
}, {
|
||||
# Channel with episodes structure instead of videos
|
||||
'url': 'https://fast.wistia.net/embed/channel/sapab9p6qd',
|
||||
'info_dict': {
|
||||
'id': 'sapab9p6qd',
|
||||
'title': 'Credo: An RCIA Program',
|
||||
'description': '\n',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.profitwell.com/recur/boxed-out',
|
||||
@@ -408,7 +399,8 @@ class WistiaChannelIE(WistiaBaseIE):
|
||||
|
||||
entries = [
|
||||
self.url_result(f'wistia:{video["hashedId"]}', WistiaIE, title=video.get('name'))
|
||||
for video in traverse_obj(series, ('sections', ..., ('videos', 'episodes'), lambda _, v: v['hashedId']))
|
||||
for video in traverse_obj(series, ('sections', ..., 'videos', ...)) or []
|
||||
if video.get('hashedId')
|
||||
]
|
||||
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import base64
|
||||
import codecs
|
||||
import itertools
|
||||
import re
|
||||
import urllib.parse
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -14,6 +16,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_call,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
@@ -29,7 +32,7 @@ class _ByteGenerator:
|
||||
try:
|
||||
self._algorithm = getattr(self, f'_algo{algo_id}')
|
||||
except AttributeError:
|
||||
raise ExtractorError(f'Unknown algorithm ID "{algo_id}"')
|
||||
raise ExtractorError(f'Unknown algorithm ID: {algo_id}')
|
||||
self._s = to_signed_32(seed)
|
||||
|
||||
def _algo1(self, s):
|
||||
@@ -213,28 +216,32 @@ class XHamsterIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_XOR_KEY = b'xh7999'
|
||||
|
||||
def _decipher_format_url(self, format_url, format_id):
|
||||
parsed_url = urllib.parse.urlparse(format_url)
|
||||
|
||||
hex_string, path_remainder = self._search_regex(
|
||||
r'^/(?P<hex>[0-9a-fA-F]{12,})(?P<rem>[/,].+)$', parsed_url.path, 'url components',
|
||||
default=(None, None), group=('hex', 'rem'))
|
||||
if not hex_string:
|
||||
self.report_warning(f'Skipping format "{format_id}": unsupported URL format')
|
||||
return None
|
||||
|
||||
byte_data = bytes.fromhex(hex_string)
|
||||
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
|
||||
|
||||
try:
|
||||
if all(char in string.hexdigits for char in format_url):
|
||||
byte_data = bytes.fromhex(format_url)
|
||||
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
|
||||
byte_gen = _ByteGenerator(byte_data[0], seed)
|
||||
except ExtractorError as e:
|
||||
self.report_warning(f'Skipping format "{format_id}": {e.msg}')
|
||||
return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
|
||||
|
||||
cipher_type, _, ciphertext = try_call(
|
||||
lambda: base64.b64decode(format_url).decode().partition('_')) or [None] * 3
|
||||
|
||||
if not cipher_type or not ciphertext:
|
||||
self.report_warning(f'Skipping format "{format_id}": failed to decipher URL')
|
||||
return None
|
||||
|
||||
deciphered = bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
|
||||
if cipher_type == 'xor':
|
||||
return bytes(
|
||||
a ^ b for a, b in
|
||||
zip(ciphertext.encode(), itertools.cycle(self._XOR_KEY))).decode()
|
||||
|
||||
return parsed_url._replace(path=f'/{deciphered}{path_remainder}').geturl()
|
||||
if cipher_type == 'rot13':
|
||||
return codecs.decode(ciphertext, cipher_type)
|
||||
|
||||
self.report_warning(f'Skipping format "{format_id}": unsupported cipher type "{cipher_type}"')
|
||||
return None
|
||||
|
||||
def _fixup_formats(self, formats):
|
||||
for f in formats:
|
||||
@@ -357,11 +364,8 @@ class XHamsterIE(InfoExtractor):
|
||||
'height': get_height(quality),
|
||||
'filesize': format_sizes.get(quality),
|
||||
'http_headers': {
|
||||
'Referer': urlh.url,
|
||||
'Referer': standard_url,
|
||||
},
|
||||
# HTTP formats return "Wrong key" error even when deciphered by site JS
|
||||
# TODO: Remove this when resolved on the site's end
|
||||
'__needs_testing': True,
|
||||
})
|
||||
|
||||
categories_list = video.get('categories')
|
||||
@@ -398,8 +402,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'age_limit': age_limit if age_limit is not None else 18,
|
||||
'categories': categories,
|
||||
'formats': self._fixup_formats(formats),
|
||||
# TODO: Revert to ('res', 'proto', 'tbr') when HTTP formats problem is resolved
|
||||
'_format_sort_fields': ('res', 'proto:m3u8', 'tbr'),
|
||||
'_format_sort_fields': ('res', 'proto', 'tbr'),
|
||||
}
|
||||
|
||||
# Old layout fallback
|
||||
|
||||
@@ -13,16 +13,55 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class YahooIE(InfoExtractor):
|
||||
IE_NAME = 'yahoo'
|
||||
IE_DESC = 'Yahoo screen and movies'
|
||||
_VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||
'info_dict': {
|
||||
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Julian Smith & Travis Legg Watch Julian Smith',
|
||||
'description': 'Julian and Travis watch Julian Smith',
|
||||
'duration': 6863,
|
||||
'timestamp': 1369812016,
|
||||
'upload_date': '20130529',
|
||||
},
|
||||
'skip': 'No longer exists',
|
||||
}, {
|
||||
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
|
||||
'md5': '7993e572fac98e044588d0b5260f4352',
|
||||
'info_dict': {
|
||||
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
|
||||
'ext': 'mp4',
|
||||
'title': "Yahoo Saves 'Community'",
|
||||
'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
|
||||
'duration': 170,
|
||||
'timestamp': 1406838636,
|
||||
'upload_date': '20140731',
|
||||
},
|
||||
'skip': 'Unfortunately, this video is not available in your region',
|
||||
}, {
|
||||
'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
|
||||
'md5': '71298482f7c64cbb7fa064e4553ff1c1',
|
||||
'info_dict': {
|
||||
'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
|
||||
'ext': 'webm',
|
||||
'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
|
||||
'description': 'md5:f66c890e1490f4910a9953c941dee944',
|
||||
'duration': 97,
|
||||
'timestamp': 1414489862,
|
||||
'upload_date': '20141028',
|
||||
},
|
||||
'skip': 'No longer exists',
|
||||
}, {
|
||||
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||
'md5': '88e209b417f173d86186bef6e4d1f160',
|
||||
'info_dict': {
|
||||
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
|
||||
@@ -30,33 +69,27 @@ class YahooIE(InfoExtractor):
|
||||
'title': 'China Moses Is Crazy About the Blues',
|
||||
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
|
||||
'duration': 128,
|
||||
'timestamp': 1385721938,
|
||||
'timestamp': 1385722202,
|
||||
'upload_date': '20131129',
|
||||
'display_id': 'china-moses-crazy-blues-104538833',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:https://media\.zenfs\.com/.+',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
|
||||
# 'md5': '989396ae73d20c6f057746fb226aa215', # varies between this and 'b17ac378b1134fa44370fb27db09a744'
|
||||
'md5': '2a9752f74cb898af5d1083ea9f661b58',
|
||||
'info_dict': {
|
||||
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
|
||||
'ext': 'mp4',
|
||||
'title': '\'True Story\' Trailer',
|
||||
'description': 'True Story',
|
||||
'duration': 150,
|
||||
'timestamp': 1418923800,
|
||||
'timestamp': 1418919206,
|
||||
'upload_date': '20141218',
|
||||
'display_id': 'true-story-trailer-173000497',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:https://media\.zenfs\.com/.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'note': 'NBC Sports embeds',
|
||||
'url': 'https://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
|
||||
'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
|
||||
'info_dict': {
|
||||
'id': '9CsDKds0kvHI',
|
||||
'ext': 'flv',
|
||||
@@ -66,10 +99,26 @@ class YahooIE(InfoExtractor):
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'timestamp': 1426270238,
|
||||
},
|
||||
'skip': 'Page no longer has video',
|
||||
}, {
|
||||
'url': 'https://tw.news.yahoo.com/-100120367.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
|
||||
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
|
||||
'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
|
||||
'info_dict': {
|
||||
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
|
||||
'ext': 'mp4',
|
||||
'title': 'Communitary - Community Episode 1: Ladders',
|
||||
'description': 'md5:8fc39608213295748e1e289807838c97',
|
||||
'duration': 1646,
|
||||
'timestamp': 1440436550,
|
||||
'upload_date': '20150824',
|
||||
'series': 'Communitary',
|
||||
'season_number': 6,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'No longer exists',
|
||||
}, {
|
||||
# ytwnews://cavideo/
|
||||
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
|
||||
@@ -80,16 +129,12 @@ class YahooIE(InfoExtractor):
|
||||
'description': '中文版預',
|
||||
'timestamp': 1476696196,
|
||||
'upload_date': '20161017',
|
||||
'view_count': int,
|
||||
'duration': 141,
|
||||
'thumbnail': r're:https://media\.zenfs\.com/.+\.jpg',
|
||||
'series': '電影',
|
||||
'display_id': '單車天使-中文版預-092316541',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Contains both a Yahoo hosted video and multiple Youtube embeds
|
||||
'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html',
|
||||
'info_dict': {
|
||||
'id': '46c5d95a-528f-3d03-b732-732fcadd51de',
|
||||
@@ -102,29 +147,24 @@ class YahooIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs',
|
||||
'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.',
|
||||
'timestamp': 1572406499,
|
||||
'timestamp': 1572406500,
|
||||
'upload_date': '20191030',
|
||||
'display_id': 'gwen-stefani-reveals-she-turned-033459311',
|
||||
'view_count': int,
|
||||
'duration': 97,
|
||||
'thumbnail': 'https://s.yimg.com/os/creatr-uploaded-images/2019-10/348bb330-fac6-11e9-8d27-38e85d573702',
|
||||
'series': 'Last Night Now',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '352CFDOQrKg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
|
||||
'description': 'md5:7fe8e3d5806f96002e55f190d1d94479',
|
||||
'uploader': 'The Voice',
|
||||
'uploader_id': 'NBCTheVoice',
|
||||
'upload_date': '20191029',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://sports.yahoo.com/video/rams-lose-grip-nfcs-top-174614409.html',
|
||||
'info_dict': {
|
||||
'id': '6b15f100-cf5c-3ad0-9c96-87cbd2f72d4a',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'rams-lose-grip-nfcs-top-174614409',
|
||||
'title': 'Rams lose their grip on NFC\'s top seed — can they still secure the bye?',
|
||||
'description': 'md5:5f4f98ab3c4de80e54c105b6bbb1d024',
|
||||
'view_count': int,
|
||||
'duration': 85,
|
||||
'thumbnail': 'https://s.yimg.com/os/creatr-uploaded-images/2025-12/94fc4840-dd02-11f0-beff-38ba3a4992e3',
|
||||
'timestamp': 1766166374,
|
||||
'upload_date': '20251219',
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404', 'Ignoring subtitle tracks'],
|
||||
}, {
|
||||
'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
|
||||
'only_matching': True,
|
||||
@@ -138,12 +178,14 @@ class YahooIE(InfoExtractor):
|
||||
|
||||
def _extract_yahoo_video(self, video_id, country):
|
||||
video = self._download_json(
|
||||
f'https://video-api.yql.yahoo.com/v1/video/sapi/streams/{video_id}',
|
||||
video_id, 'Downloading video JSON metadata')['query']['results']['mediaObj'][0]['meta']
|
||||
f'https://{country}.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["{video_id}"]',
|
||||
video_id, 'Downloading video JSON metadata')[0]
|
||||
title = video['title']
|
||||
|
||||
if country == 'malaysia':
|
||||
country = 'my'
|
||||
|
||||
is_live = traverse_obj(video, ('uplynk_live', {bool})) is True
|
||||
is_live = video.get('live_state') == 'live'
|
||||
fmts = ('m3u8',) if is_live else ('webm', 'mp4')
|
||||
|
||||
urls = []
|
||||
@@ -189,23 +231,43 @@ class YahooIE(InfoExtractor):
|
||||
'ext': mimetype2ext(cc.get('content_type')),
|
||||
})
|
||||
|
||||
streaming_url = video.get('streaming_url')
|
||||
if streaming_url and not is_live:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
streaming_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
if not formats and msg == 'geo restricted':
|
||||
self.raise_geo_restricted(metadata_available=True)
|
||||
|
||||
thumbnails = []
|
||||
for thumb in video.get('thumbnails', []):
|
||||
thumb_url = thumb.get('url')
|
||||
if not thumb_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumb.get('tag'),
|
||||
'url': thumb.get('url'),
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
})
|
||||
|
||||
series_info = video.get('series_info') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': parse_iso8601(video.get('publish_time')),
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'view_count': int_or_none(video.get('view_count')),
|
||||
'is_live': is_live,
|
||||
**traverse_obj(video, {
|
||||
'title': ('title', {clean_html}),
|
||||
'description': ('description', {clean_html}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}, {update_url(scheme='https')}),
|
||||
'timestamp': ('publish_time', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'series': ('show_name', {str}, filter),
|
||||
}),
|
||||
'series': video.get('show_name'),
|
||||
'season_number': int_or_none(series_info.get('season_number')),
|
||||
'episode_number': int_or_none(series_info.get('episode_number')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -259,13 +321,14 @@ class YahooIE(InfoExtractor):
|
||||
|
||||
|
||||
class YahooSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Yahoo screen search'
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = 'yahoo:search'
|
||||
IE_NAME = 'screen.yahoo:search'
|
||||
_SEARCH_KEY = 'yvsearch'
|
||||
|
||||
def _search_results(self, query):
|
||||
for pagenum in itertools.count(0):
|
||||
result_url = f'https://video.search.yahoo.com/search/?p={urllib.parse.quote_plus(query)}&fr=screen&o=js&gs=0&b={pagenum * 30}'
|
||||
result_url = f'http://video.search.yahoo.com/search/?p={urllib.parse.quote_plus(query)}&fr=screen&o=js&gs=0&b={pagenum * 30}'
|
||||
info = self._download_json(result_url, query,
|
||||
note='Downloading results page ' + str(pagenum + 1))
|
||||
yield from (self.url_result(result['rurl']) for result in info['results'])
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
remove_end,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class YfanefaIE(InfoExtractor):
|
||||
IE_NAME = 'yfanefa'
|
||||
_VALID_URL = r'https?://(?:www\.)?yfanefa\.com/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.yfanefa.com/record/2717',
|
||||
'info_dict': {
|
||||
'id': 'record-2717',
|
||||
'ext': 'mp4',
|
||||
'title': 'THE HALLAMSHIRE RIFLES LEAVING SHEFFIELD, 1914',
|
||||
'duration': 5239,
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yfanefa.com/news/53',
|
||||
'info_dict': {
|
||||
'id': 'news-53',
|
||||
'ext': 'mp4',
|
||||
'title': 'Memory Bank: Bradford Launch',
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.yfanefa.com/evaluating_nature_matters',
|
||||
'info_dict': {
|
||||
'id': 'evaluating_nature_matters',
|
||||
'ext': 'mp4',
|
||||
'title': 'Evaluating Nature Matters',
|
||||
'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_data = self._search_json(
|
||||
r'iwPlayer\.options\["[\w.]+"\]\s*=', webpage, 'player options', video_id)
|
||||
|
||||
formats = []
|
||||
video_url = join_nonempty(player_data['url'], player_data.get('signature'), delim='')
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url, 'ext': 'mp4'}]
|
||||
|
||||
return {
|
||||
'id': video_id.strip('/').replace('/', '-'),
|
||||
'title':
|
||||
self._og_search_title(webpage, default=None)
|
||||
or remove_end(self._html_extract_title(webpage), ' | Yorkshire Film Archive'),
|
||||
'formats': formats,
|
||||
**traverse_obj(player_data, {
|
||||
'thumbnail': ('preview', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -104,7 +104,6 @@ INNERTUBE_CLIENTS = {
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
**WEB_PO_TOKEN_POLICIES,
|
||||
},
|
||||
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
|
||||
@@ -118,7 +117,6 @@ INNERTUBE_CLIENTS = {
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
**WEB_PO_TOKEN_POLICIES,
|
||||
},
|
||||
'web_embedded': {
|
||||
@@ -159,7 +157,6 @@ INNERTUBE_CLIENTS = {
|
||||
),
|
||||
},
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
},
|
||||
# This client now requires sign-in for every video
|
||||
'web_creator': {
|
||||
@@ -316,7 +313,6 @@ INNERTUBE_CLIENTS = {
|
||||
),
|
||||
},
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'SUPPORTS_AD_PLAYBACK_CONTEXT': True,
|
||||
},
|
||||
'tv': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
@@ -416,7 +412,6 @@ def build_innertube_clients():
|
||||
ytcfg.setdefault('SUBS_PO_TOKEN_POLICY', SubsPoTokenPolicy())
|
||||
ytcfg.setdefault('REQUIRE_AUTH', False)
|
||||
ytcfg.setdefault('SUPPORTS_COOKIES', False)
|
||||
ytcfg.setdefault('SUPPORTS_AD_PLAYBACK_CONTEXT', False)
|
||||
ytcfg.setdefault('PLAYER_PARAMS', None)
|
||||
ytcfg.setdefault('AUTHENTICATED_USER_AGENT', None)
|
||||
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
|
||||
@@ -1065,7 +1060,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return next_continuation
|
||||
|
||||
return traverse_obj(renderer, (
|
||||
('contents', 'items', 'rows', 'subThreads'), ..., 'continuationItemRenderer',
|
||||
('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
|
||||
('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
|
||||
), get_all=False, expected_type=cls._extract_continuation_ep_data)
|
||||
|
||||
|
||||
@@ -382,8 +382,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
('accessibilityText', {lambda x: re.fullmatch(r'(.+), (?:[\d,.]+(?:[KM]| million)?|No) views? - play Short', x)}, 1)), any),
|
||||
'view_count': ('overlayMetadata', 'secondaryText', 'content', {parse_count}),
|
||||
}),
|
||||
thumbnails=self._extract_thumbnails(
|
||||
renderer, ('thumbnailViewModel', 'thumbnailViewModel', 'image'), final_key='sources'))
|
||||
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
|
||||
return
|
||||
|
||||
def _video_entry(self, video_renderer):
|
||||
@@ -1586,6 +1585,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_count': 50,
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||
'note': 'unlisted single video playlist',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
|
||||
'info_dict': {
|
||||
@@ -1885,6 +1885,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
# Shorts url result in shorts tab
|
||||
# TODO: Fix channel id extraction
|
||||
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
|
||||
'info_dict': {
|
||||
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
@@ -1913,6 +1915,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'params': {'extract_flat': True},
|
||||
}, {
|
||||
# Live video status should be extracted
|
||||
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
|
||||
'info_dict': {
|
||||
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
|
||||
|
||||
@@ -76,7 +76,7 @@ STREAMING_DATA_FETCH_GVS_PO_TOKEN = '__yt_dlp_fetch_gvs_po_token'
|
||||
STREAMING_DATA_PLAYER_TOKEN_PROVIDED = '__yt_dlp_player_token_provided'
|
||||
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
|
||||
STREAMING_DATA_IS_PREMIUM_SUBSCRIBER = '__yt_dlp_is_premium_subscriber'
|
||||
STREAMING_DATA_AVAILABLE_AT_TIMESTAMP = '__yt_dlp_available_at_timestamp'
|
||||
STREAMING_DATA_FETCHED_TIMESTAMP = '__yt_dlp_fetched_timestamp'
|
||||
|
||||
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
|
||||
|
||||
@@ -1660,71 +1660,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# Comment subthreads with 4 levels of depth
|
||||
'url': 'https://www.youtube.com/watch?v=f6HNySwZV4c',
|
||||
'info_dict': {
|
||||
'id': 'f6HNySwZV4c',
|
||||
'ext': 'mp4',
|
||||
'title': 'dlptestvideo2',
|
||||
'description': '',
|
||||
'media_type': 'video',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'uploader_id': '@coletdjnz',
|
||||
'uploader_url': 'https://www.youtube.com/@coletdjnz',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 5,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/f6HNySwZV4c/maxresdefault.jpg',
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': [],
|
||||
'timestamp': 1709856007,
|
||||
'upload_date': '20240308',
|
||||
'release_timestamp': 1709856007,
|
||||
'release_date': '20240308',
|
||||
'playable_in_embed': True,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'comment_count': 15, # XXX: minimum
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'getcomments': True,
|
||||
},
|
||||
}, {
|
||||
# Comments: `subThreads` containing `commentThreadRenderer`s AND `continuationItemRenderer`
|
||||
'url': 'https://www.youtube.com/watch?v=3dHQb2Nhma0',
|
||||
'info_dict': {
|
||||
'id': '3dHQb2Nhma0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tɪtle',
|
||||
'description': '',
|
||||
'media_type': 'video',
|
||||
'uploader': 'abcdefg',
|
||||
'uploader_id': '@abcdefg-d5t2c',
|
||||
'uploader_url': 'https://www.youtube.com/@abcdefg-d5t2c',
|
||||
'channel': 'abcdefg',
|
||||
'channel_id': 'UCayEJzV8XSSJkPdA7OAsbew',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCayEJzV8XSSJkPdA7OAsbew',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 12,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/3dHQb2Nhma0/maxresdefault.jpg',
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': [],
|
||||
'timestamp': 1767158812,
|
||||
'upload_date': '20251231',
|
||||
'playable_in_embed': True,
|
||||
'availability': 'unlisted',
|
||||
'live_status': 'not_live',
|
||||
'comment_count': 9, # XXX: minimum
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# <object>
|
||||
@@ -2467,7 +2402,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
return info
|
||||
|
||||
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None, depth=1):
|
||||
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
|
||||
|
||||
get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
|
||||
|
||||
@@ -2499,15 +2434,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
break
|
||||
return _continuation
|
||||
|
||||
def extract_thread(contents, entity_payloads, thread_parent, thread_depth):
|
||||
if not thread_parent:
|
||||
def extract_thread(contents, entity_payloads):
|
||||
if not parent:
|
||||
tracker['current_page_thread'] = 0
|
||||
|
||||
if max_depth < thread_depth:
|
||||
return
|
||||
|
||||
for content in contents:
|
||||
if not thread_parent and tracker['total_parent_comments'] >= max_parents:
|
||||
if not parent and tracker['total_parent_comments'] >= max_parents:
|
||||
yield
|
||||
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
|
||||
|
||||
@@ -2517,7 +2448,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
||||
expected_type=dict, default={})
|
||||
|
||||
comment = self._extract_comment_old(comment_renderer, thread_parent)
|
||||
comment = self._extract_comment_old(comment_renderer, parent)
|
||||
|
||||
# new comment format
|
||||
else:
|
||||
@@ -2528,7 +2459,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not comment_keys:
|
||||
continue
|
||||
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
|
||||
comment = self._extract_comment(entities, thread_parent)
|
||||
comment = self._extract_comment(entities, parent)
|
||||
if comment:
|
||||
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
|
||||
|
||||
@@ -2547,14 +2478,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
self.report_warning(
|
||||
'Detected YouTube comments looping. Stopping comment extraction '
|
||||
f'{"for this thread" if thread_parent else ""} as we probably cannot get any more.')
|
||||
f'{"for this thread" if parent else ""} as we probably cannot get any more.')
|
||||
yield
|
||||
break # Safeguard for recursive call in subthreads code path below
|
||||
else:
|
||||
tracker['seen_comment_ids'].add(comment_id)
|
||||
tracker['seen_comment_ids'].add(comment['id'])
|
||||
|
||||
tracker['running_total'] += 1
|
||||
tracker['total_reply_comments' if thread_parent else 'total_parent_comments'] += 1
|
||||
tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
|
||||
yield comment
|
||||
|
||||
# Attempt to get the replies
|
||||
@@ -2562,22 +2492,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
|
||||
|
||||
if comment_replies_renderer:
|
||||
subthreads = traverse_obj(comment_replies_renderer, ('subThreads', ..., {dict}))
|
||||
# Recursively extract from `commentThreadRenderer`s in `subThreads`
|
||||
if threads := traverse_obj(subthreads, lambda _, v: v['commentThreadRenderer']):
|
||||
for entry in extract_thread(threads, entity_payloads, comment_id, thread_depth + 1):
|
||||
if entry:
|
||||
yield entry
|
||||
if not traverse_obj(subthreads, lambda _, v: v['continuationItemRenderer']):
|
||||
# All of the subThreads' `continuationItemRenderer`s were within the nested
|
||||
# `commentThreadRenderer`s and are now exhausted, so avoid unnecessary recursion below
|
||||
continue
|
||||
|
||||
tracker['current_page_thread'] += 1
|
||||
# Recursively extract from `continuationItemRenderer` in `subThreads`
|
||||
comment_entries_iter = self._comment_entries(
|
||||
comment_replies_renderer, ytcfg, video_id,
|
||||
parent=comment_id, tracker=tracker, depth=thread_depth + 1)
|
||||
parent=comment.get('id'), tracker=tracker)
|
||||
yield from itertools.islice(comment_entries_iter, min(
|
||||
max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
|
||||
|
||||
@@ -2593,12 +2511,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'pinned_comment_ids': set(),
|
||||
}
|
||||
|
||||
_max_comments, max_parents, max_replies, max_replies_per_thread, max_depth, *_ = (
|
||||
int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 5)
|
||||
|
||||
if max_depth < depth:
|
||||
# TODO: Deprecated
|
||||
# YouTube comments have a max depth of 2
|
||||
max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
|
||||
if max_depth:
|
||||
self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
|
||||
'Set max replies in the max-comments extractor argument instead')
|
||||
if max_depth == 1 and parent:
|
||||
return
|
||||
|
||||
_max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
|
||||
int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
|
||||
|
||||
continuation = self._extract_continuation(root_continuation_data)
|
||||
|
||||
response = None
|
||||
@@ -2626,7 +2550,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
|
||||
tracker['current_page_thread'], comment_prog_str)
|
||||
else:
|
||||
# TODO: `parent` is only truthy in this code path with YT's legacy (non-threaded) comment view
|
||||
note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
|
||||
' ' if parent else '', ' replies' if parent else '',
|
||||
page_num, comment_prog_str)
|
||||
@@ -2643,7 +2566,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
||||
check_get_keys=check_get_keys)
|
||||
except ExtractorError as e:
|
||||
# TODO: This code path is not reached since eb5bdbfa70126c7d5355cc0954b63720522e462c
|
||||
# Ignore incomplete data error for replies if retries didn't work.
|
||||
# This is to allow any other parent comments and comment threads to be downloaded.
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/4669
|
||||
@@ -2670,7 +2592,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
break
|
||||
continue
|
||||
|
||||
for entry in extract_thread(continuation_items, mutations, parent, depth):
|
||||
for entry in extract_thread(continuation_items, mutations):
|
||||
if not entry:
|
||||
return
|
||||
yield entry
|
||||
@@ -2707,23 +2629,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return {'contentCheckOk': True, 'racyCheckOk': True}
|
||||
|
||||
@classmethod
|
||||
def _generate_player_context(cls, sts=None, use_ad_playback_context=False):
|
||||
def _generate_player_context(cls, sts=None):
|
||||
context = {
|
||||
'html5Preference': 'HTML5_PREF_WANTS',
|
||||
}
|
||||
if sts is not None:
|
||||
context['signatureTimestamp'] = sts
|
||||
|
||||
playback_context = {
|
||||
'contentPlaybackContext': context,
|
||||
}
|
||||
if use_ad_playback_context:
|
||||
playback_context['adPlaybackContext'] = {
|
||||
'pyv': True,
|
||||
}
|
||||
|
||||
return {
|
||||
'playbackContext': playback_context,
|
||||
'playbackContext': {
|
||||
'contentPlaybackContext': context,
|
||||
},
|
||||
**cls._get_checkok_params(),
|
||||
}
|
||||
|
||||
@@ -2951,13 +2866,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
|
||||
|
||||
sts = self._extract_signature_timestamp(video_id, player_url, webpage_ytcfg, fatal=False) if player_url else None
|
||||
|
||||
use_ad_playback_context = (
|
||||
self._configuration_arg('use_ad_playback_context', ['false'])[0] != 'false'
|
||||
and traverse_obj(INNERTUBE_CLIENTS, (client, 'SUPPORTS_AD_PLAYBACK_CONTEXT', {bool})))
|
||||
|
||||
yt_query.update(self._generate_player_context(sts, use_ad_playback_context))
|
||||
|
||||
yt_query.update(self._generate_player_context(sts))
|
||||
return self._extract_response(
|
||||
item_id=video_id, ep='player', query=yt_query,
|
||||
ytcfg=player_ytcfg, headers=headers, fatal=True,
|
||||
@@ -2992,10 +2901,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
if not (requested_clients or excluded_clients) and default_clients == self._DEFAULT_JSLESS_CLIENTS:
|
||||
self.report_warning(
|
||||
f'No supported JavaScript runtime could be found. Only deno is enabled by default; '
|
||||
f'to use another runtime add --js-runtimes RUNTIME[:PATH] to your command/config. '
|
||||
f'YouTube extraction without a JS runtime has been deprecated, and some formats may be missing. '
|
||||
f'See {_EJS_WIKI_URL} for details on installing one', only_once=True)
|
||||
f'No supported JavaScript runtime could be found. YouTube extraction without '
|
||||
f'a JS runtime has been deprecated, and some formats may be missing. '
|
||||
f'See {_EJS_WIKI_URL} for details on installing one. To silence this warning, '
|
||||
f'you can use --extractor-args "youtube:player_client=default"', only_once=True)
|
||||
|
||||
if not requested_clients:
|
||||
requested_clients.extend(default_clients)
|
||||
@@ -3123,6 +3032,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif pr:
|
||||
# Save client details for introspection later
|
||||
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
|
||||
fetched_timestamp = int(time.time())
|
||||
sd = pr.setdefault('streamingData', {})
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = client
|
||||
sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||||
@@ -3130,7 +3040,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
|
||||
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
|
||||
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
|
||||
sd[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP] = self._get_available_at_timestamp(pr, video_id, client)
|
||||
sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = client
|
||||
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||||
@@ -3240,9 +3150,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
|
||||
'Use formats=duplicate extractor argument instead')
|
||||
|
||||
def is_super_resolution(f_url):
|
||||
return '1' in traverse_obj(f_url, ({parse_qs}, 'xtags', ..., {urllib.parse.parse_qs}, 'sr', ...))
|
||||
|
||||
def solve_sig(s, spec):
|
||||
return ''.join(s[i] for i in spec)
|
||||
|
||||
@@ -3262,6 +3169,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# save pots per client to avoid fetching again
|
||||
gvs_pots = {}
|
||||
|
||||
# For handling potential pre-playback required waiting period
|
||||
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
|
||||
|
||||
def get_language_code_and_preference(fmt_stream):
|
||||
audio_track = fmt_stream.get('audioTrack') or {}
|
||||
display_name = audio_track.get('displayName') or ''
|
||||
@@ -3286,13 +3196,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
|
||||
player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
|
||||
client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
|
||||
available_at = streaming_data[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP]
|
||||
available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
|
||||
streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
|
||||
|
||||
def get_stream_id(fmt_stream):
|
||||
return str_or_none(fmt_stream.get('itag')), traverse_obj(fmt_stream, 'audioTrack', 'id'), fmt_stream.get('isDrc')
|
||||
|
||||
def process_format_stream(fmt_stream, proto, missing_pot, super_resolution=False):
|
||||
def process_format_stream(fmt_stream, proto, missing_pot):
|
||||
itag = str_or_none(fmt_stream.get('itag'))
|
||||
audio_track = fmt_stream.get('audioTrack') or {}
|
||||
quality = fmt_stream.get('quality')
|
||||
@@ -3343,13 +3253,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
dct = {
|
||||
'asr': int_or_none(fmt_stream.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt_stream.get('contentLength')),
|
||||
'format_id': join_nonempty(itag, (
|
||||
'drc' if fmt_stream.get('isDrc')
|
||||
else 'sr' if super_resolution
|
||||
else None)),
|
||||
'format_id': f'{itag}{"-drc" if fmt_stream.get("isDrc") else ""}',
|
||||
'format_note': join_nonempty(
|
||||
join_nonempty(audio_track.get('displayName'), audio_track.get('audioIsDefault') and '(default)', delim=' '),
|
||||
name, fmt_stream.get('isDrc') and 'DRC', super_resolution and 'AI-upscaled',
|
||||
name, fmt_stream.get('isDrc') and 'DRC',
|
||||
try_get(fmt_stream, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt_stream, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
is_damaged and 'DAMAGED', missing_pot and 'MISSING POT',
|
||||
@@ -3385,8 +3292,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def process_https_formats():
|
||||
proto = 'https'
|
||||
https_fmts = []
|
||||
skip_player_js = 'js' in self._configuration_arg('player_skip')
|
||||
|
||||
for fmt_stream in streaming_formats:
|
||||
if fmt_stream.get('targetDurationSec'):
|
||||
continue
|
||||
@@ -3424,30 +3329,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
|
||||
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
||||
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||||
if not all((sc, fmt_url, skip_player_js or player_url, encrypted_sig)):
|
||||
msg = f'Some {client_name} client https formats have been skipped as they are missing a URL. '
|
||||
if not all((sc, fmt_url, player_url, encrypted_sig)):
|
||||
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||||
if client_name in ('web', 'web_safari'):
|
||||
msg += 'YouTube is forcing SABR streaming for this client. '
|
||||
else:
|
||||
msg += (
|
||||
f'YouTube may have enabled the SABR-only streaming experiment for '
|
||||
f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
|
||||
f'{"your account" if self.is_authenticated else "the current session"}. '
|
||||
)
|
||||
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
|
||||
self.report_warning(msg, video_id, only_once=True)
|
||||
continue
|
||||
|
||||
fmt = process_format_stream(
|
||||
fmt_stream, proto, missing_pot=require_po_token and not po_token,
|
||||
super_resolution=is_super_resolution(fmt_url))
|
||||
fmt = process_format_stream(fmt_stream, proto, missing_pot=require_po_token and not po_token)
|
||||
if not fmt:
|
||||
continue
|
||||
|
||||
# signature
|
||||
# Attempt to load sig spec from cache
|
||||
if encrypted_sig:
|
||||
if skip_player_js:
|
||||
continue
|
||||
spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig))
|
||||
spec = self._load_sig_spec_from_cache(spec_cache_id)
|
||||
if spec:
|
||||
@@ -3461,8 +3362,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# n challenge
|
||||
query = parse_qs(fmt_url)
|
||||
if query.get('n'):
|
||||
if skip_player_js:
|
||||
continue
|
||||
n_challenge = query['n'][0]
|
||||
if n_challenge in self._player_cache:
|
||||
fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]})
|
||||
@@ -3746,36 +3645,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
}))
|
||||
return webpage
|
||||
|
||||
def _get_available_at_timestamp(self, player_response, video_id, client):
|
||||
now = time.time()
|
||||
wait_seconds = 0
|
||||
|
||||
for renderer in traverse_obj(player_response, (
|
||||
'adSlots', lambda _, v: v['adSlotRenderer']['adSlotMetadata']['triggerEvent'] == 'SLOT_TRIGGER_EVENT_BEFORE_CONTENT',
|
||||
'adSlotRenderer', 'fulfillmentContent', 'fulfilledLayout', 'playerBytesAdLayoutRenderer', 'renderingContent', (
|
||||
None,
|
||||
('playerBytesSequentialLayoutRenderer', 'sequentialLayouts', ..., 'playerBytesAdLayoutRenderer', 'renderingContent'),
|
||||
), 'instreamVideoAdRenderer', {dict},
|
||||
)):
|
||||
duration = traverse_obj(renderer, ('playerVars', {urllib.parse.parse_qs}, 'length_seconds', -1, {int_or_none}))
|
||||
ad = 'an ad' if duration is None else f'a {duration}s ad'
|
||||
|
||||
skip_time = traverse_obj(renderer, ('skipOffsetMilliseconds', {float_or_none(scale=1000)}))
|
||||
if skip_time is not None:
|
||||
# YT allows skipping this ad; use the wait-until-skip time instead of full ad duration
|
||||
skip_time = skip_time if skip_time % 1 else int(skip_time)
|
||||
ad += f' skippable after {skip_time}s'
|
||||
duration = skip_time
|
||||
|
||||
if duration is not None:
|
||||
self.write_debug(f'{video_id}: Detected {ad} for {client}')
|
||||
wait_seconds += duration
|
||||
|
||||
if wait_seconds:
|
||||
return math.ceil(now) + wait_seconds
|
||||
|
||||
return int(now)
|
||||
|
||||
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
|
||||
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
|
||||
is_live = get_first(video_details, 'isLive')
|
||||
@@ -4126,11 +3995,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
STREAMING_DATA_CLIENT_NAME: client_name,
|
||||
})
|
||||
|
||||
def set_audio_lang_from_orig_subs_lang(lang_code):
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and not f.get('language'):
|
||||
f['language'] = lang_code
|
||||
|
||||
subtitles = {}
|
||||
skipped_subs_clients = set()
|
||||
|
||||
@@ -4190,8 +4054,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
orig_lang = qs.get('lang', [None])[-1]
|
||||
lang_name = self._get_text(caption_track, 'name', max_runs=1)
|
||||
is_manual_subs = caption_track.get('kind') != 'asr'
|
||||
if is_manual_subs:
|
||||
if caption_track.get('kind') != 'asr':
|
||||
if not lang_code:
|
||||
continue
|
||||
process_language(
|
||||
@@ -4202,14 +4065,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not trans_code:
|
||||
continue
|
||||
orig_trans_code = trans_code
|
||||
if is_manual_subs and trans_code != 'und':
|
||||
if caption_track.get('kind') != 'asr' and trans_code != 'und':
|
||||
if not get_translated_subs:
|
||||
continue
|
||||
trans_code += f'-{lang_code}'
|
||||
trans_name += format_field(lang_name, None, ' from %s')
|
||||
if lang_code == f'a-{orig_trans_code}':
|
||||
# Set audio language based on original subtitles
|
||||
set_audio_lang_from_orig_subs_lang(orig_trans_code)
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and not f.get('language'):
|
||||
f['language'] = orig_trans_code
|
||||
# Add an "-orig" label to the original language so that it can be distinguished.
|
||||
# The subs are returned without "-orig" as well for compatibility
|
||||
process_language(
|
||||
@@ -4220,21 +4085,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
automatic_captions, base_url, trans_code, trans_name, client_name,
|
||||
pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params})
|
||||
|
||||
# Extract automatic captions when the language is not in 'translationLanguages'
|
||||
# e.g. Cantonese [yue], see https://github.com/yt-dlp/yt-dlp/issues/14889
|
||||
lang_code = remove_start(lang_code, 'a-')
|
||||
if is_manual_subs or not lang_code or lang_code in automatic_captions:
|
||||
continue
|
||||
lang_name = remove_end(lang_name, ' (auto-generated)')
|
||||
if caption_track.get('isTranslatable'):
|
||||
# We can assume this is the original audio language
|
||||
set_audio_lang_from_orig_subs_lang(lang_code)
|
||||
process_language(
|
||||
automatic_captions, base_url, f'{lang_code}-orig',
|
||||
f'{lang_name} (Original)', client_name, pot_params)
|
||||
process_language(
|
||||
automatic_captions, base_url, lang_code, lang_name, client_name, pot_params)
|
||||
|
||||
# Avoid duplication if we've already got everything we need
|
||||
need_subs_langs.difference_update(subtitles)
|
||||
need_caps_langs.difference_update(automatic_captions)
|
||||
|
||||
@@ -21,7 +21,6 @@ from yt_dlp.extractor.youtube.jsc.provider import (
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._provider import configuration_arg
|
||||
from yt_dlp.extractor.youtube.pot.provider import provider_bug_report_message
|
||||
from yt_dlp.utils import version_tuple
|
||||
from yt_dlp.utils._jsruntime import JsRuntimeInfo
|
||||
|
||||
if _has_ejs:
|
||||
@@ -224,8 +223,7 @@ class EJSBaseJCP(JsChallengeProvider):
|
||||
skipped_components.append(script)
|
||||
continue
|
||||
if not self.is_dev:
|
||||
# Matching patch version is expected to have same hash
|
||||
if version_tuple(script.version, lenient=True)[:2] != version_tuple(self._SCRIPT_VERSION, lenient=True)[:2]:
|
||||
if script.version != self._SCRIPT_VERSION:
|
||||
self.logger.warning(
|
||||
f'Challenge solver {script_type.value} script version {script.version} '
|
||||
f'is not supported (source: {script.source.value}, variant: {script.variant}, supported version: {self._SCRIPT_VERSION})')
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# This file is generated by devscripts/update_ejs.py. DO NOT MODIFY!
|
||||
|
||||
VERSION = '0.3.2'
|
||||
VERSION = '0.3.1'
|
||||
HASHES = {
|
||||
'yt.solver.bun.lib.js': '6ff45e94de9f0ea936a183c48173cfa9ce526ee4b7544cd556428427c1dd53c8073ef0174e79b320252bf0e7c64b0032cc1cf9c4358f3fda59033b7caa01c241',
|
||||
'yt.solver.core.js': '0cd96b2d3f319dfa62cae689efa7d930ef1706e95f5921794db5089b2262957ec0a17d73938d8975ea35d0309cbfb4c8e4418d5e219837215eee242890c8b64d',
|
||||
|
||||
@@ -127,7 +127,6 @@ class ZDFBaseIE(InfoExtractor):
|
||||
**parse_codecs(quality.get('mimeCodec')),
|
||||
'height': height,
|
||||
'width': width,
|
||||
'filesize': int_or_none(variant.get('filesize')),
|
||||
'format_id': join_nonempty('http', stream.get('type')),
|
||||
'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)),
|
||||
}]
|
||||
@@ -194,10 +193,8 @@ class ZDFBaseIE(InfoExtractor):
|
||||
class ZDFIE(ZDFBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?zdf\.de/(?:video|play)/(?:[^/?#]+/)*(?P<id>[^/?#]+)',
|
||||
# Legacy redirects from before the redesign in 2025-03 or from before sister sites moved to their own domains
|
||||
# /nachrichten/ sub-site URLs and legacy redirects from before the redesign in 2025-03
|
||||
r'https?://(?:www\.)?zdf\.de/(?:[^/?#]+/)*(?P<id>[^/?#]+)\.html',
|
||||
# Sister sites
|
||||
r'https?://(?:www\.)?(?:zdfheute|logo)\.de/(?:[^/?#]+/)*(?P<id>[^/?#]+)\.html',
|
||||
]
|
||||
IE_NAME = 'zdf'
|
||||
_TESTS = [{
|
||||
@@ -209,7 +206,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
'title': 'Sylt - Deutschlands edles Nordlicht',
|
||||
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
|
||||
'duration': 810.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
|
||||
'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011',
|
||||
'series': 'Sylt - Deutschlands edles Nordlicht',
|
||||
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
|
||||
'timestamp': 1612462500,
|
||||
@@ -225,7 +222,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
'title': 'Sylt - Deutschlands edles Nordlicht',
|
||||
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
|
||||
'duration': 810.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
|
||||
'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011',
|
||||
'series': 'Sylt - Deutschlands edles Nordlicht',
|
||||
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
|
||||
'timestamp': 1612462500,
|
||||
@@ -242,7 +239,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
'title': 'Sylt - Deutschlands edles Nordlicht',
|
||||
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
|
||||
'duration': 810.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
|
||||
'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011',
|
||||
'series': 'Sylt - Deutschlands edles Nordlicht',
|
||||
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
|
||||
'timestamp': 1612462500,
|
||||
@@ -252,66 +249,67 @@ class ZDFIE(ZDFBaseIE):
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# Video belongs to a playlist, video URL
|
||||
# Also: video mirrored from ARD Mediathek
|
||||
'url': 'https://www.zdf.de/video/dokus/collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132/page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
|
||||
'md5': '84980c1a0148da6cd94de58333d7e1ee',
|
||||
'url': 'https://www.zdf.de/video/dokus/die-magie-der-farben-116/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
|
||||
'md5': '1eda17eb40a9ead3046326e10b9c5973',
|
||||
'info_dict': {
|
||||
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
|
||||
'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
|
||||
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
|
||||
'duration': 895.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
|
||||
'title': 'Von Königspurpur bis Jeansblau',
|
||||
'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
|
||||
'duration': 2615.0,
|
||||
'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971',
|
||||
'series': 'Die Magie der Farben',
|
||||
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
|
||||
'season': 'Season 2023',
|
||||
'season_number': 2023,
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
'timestamp': 1690902120,
|
||||
'upload_date': '20230801',
|
||||
'_old_archive_ids': ['zdf video_ard_dXJuOmFyZDpwdWJsaWNhdGlvbjo0YTYyOTJjM2Q0ZThlNmY1'],
|
||||
'series_id': 'die-magie-der-farben-116',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'timestamp': 1445797800,
|
||||
'upload_date': '20151025',
|
||||
'_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
|
||||
},
|
||||
}, {
|
||||
# Video belongs to a playlist, play URL
|
||||
'url': 'https://www.zdf.de/play/dokus/collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132/page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
|
||||
'url': 'https://www.zdf.de/play/dokus/die-magie-der-farben-116/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
|
||||
'md5': '1eda17eb40a9ead3046326e10b9c5973',
|
||||
'info_dict': {
|
||||
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
|
||||
'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
|
||||
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
|
||||
'duration': 895.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
|
||||
'title': 'Von Königspurpur bis Jeansblau',
|
||||
'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
|
||||
'duration': 2615.0,
|
||||
'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971',
|
||||
'series': 'Die Magie der Farben',
|
||||
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
|
||||
'season': 'Season 2023',
|
||||
'season_number': 2023,
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
'timestamp': 1690902120,
|
||||
'upload_date': '20230801',
|
||||
'_old_archive_ids': ['zdf video_ard_dXJuOmFyZDpwdWJsaWNhdGlvbjo0YTYyOTJjM2Q0ZThlNmY1'],
|
||||
'series_id': 'die-magie-der-farben-116',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'timestamp': 1445797800,
|
||||
'upload_date': '20151025',
|
||||
'_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# Video belongs to a playlist, legacy URL before website redesign in 2025-03
|
||||
'url': 'https://www.zdf.de/dokus/collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132/page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102.html',
|
||||
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
|
||||
'md5': '1eda17eb40a9ead3046326e10b9c5973',
|
||||
'info_dict': {
|
||||
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
|
||||
'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
|
||||
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
|
||||
'duration': 895.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
|
||||
'title': 'Von Königspurpur bis Jeansblau',
|
||||
'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
|
||||
'duration': 2615.0,
|
||||
'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971',
|
||||
'series': 'Die Magie der Farben',
|
||||
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
|
||||
'season': 'Season 2023',
|
||||
'season_number': 2023,
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
'timestamp': 1690902120,
|
||||
'upload_date': '20230801',
|
||||
'_old_archive_ids': ['zdf video_ard_dXJuOmFyZDpwdWJsaWNhdGlvbjo0YTYyOTJjM2Q0ZThlNmY1'],
|
||||
'series_id': 'die-magie-der-farben-116',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'timestamp': 1445797800,
|
||||
'upload_date': '20151025',
|
||||
'_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -339,50 +337,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
'_old_archive_ids': ['zdf 211219_sendung_hjo'],
|
||||
},
|
||||
}, {
|
||||
# FUNK video (hosted on a different CDN, has atypical PTMD and HLS files)
|
||||
'url': 'https://www.zdf.de/video/serien/funk-collection-funk-11790-1596/funk-alles-ist-verzaubert-102',
|
||||
'md5': '57af4423db0455a3975d2dc4578536bc',
|
||||
'info_dict': {
|
||||
'id': 'funk-alles-ist-verzaubert-102',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alles ist verzaubert',
|
||||
'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
|
||||
'duration': 1278.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/teaser-funk-alles-ist-verzaubert-102~original\?cb=\d+',
|
||||
'series': 'DRUCK',
|
||||
'series_id': 'funk-collection-funk-11790-1596',
|
||||
'season': 'Season 2021',
|
||||
'season_number': 2021,
|
||||
'episode': 'Episode 50',
|
||||
'episode_number': 50,
|
||||
'timestamp': 1635520560,
|
||||
'upload_date': '20211029',
|
||||
'_old_archive_ids': ['zdf video_funk_1770473'],
|
||||
},
|
||||
}, {
|
||||
# zdfheute video, also available on zdf.de
|
||||
'url': 'https://www.zdfheute.de/video/heute-journal/heute-journal-vom-19-dezember-2025-100.html',
|
||||
'md5': '47af8c2cfa30abf74501170f62754c63',
|
||||
'info_dict': {
|
||||
'id': 'heute-journal-vom-19-dezember-2025-100',
|
||||
'ext': 'mp4',
|
||||
'title': 'heute journal vom 19. Dezember 2025',
|
||||
'description': 'md5:fd0dfbce0783486db839ff9140a8074b',
|
||||
'duration': 1780.0,
|
||||
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/273e5545-16e7-4ca3-898e-52fe9e06d964?layout=2400x1350',
|
||||
'chapters': 'count:10',
|
||||
'series': 'heute journal',
|
||||
'series_id': 'heute-journal-104',
|
||||
'season': 'Season 2025',
|
||||
'season_number': 2025,
|
||||
'episode': 'Episode 365',
|
||||
'episode_number': 365,
|
||||
'timestamp': 1766178000,
|
||||
'upload_date': '20251219',
|
||||
'_old_archive_ids': ['zdf 251219_2200_sendung_hjo'],
|
||||
},
|
||||
}, {
|
||||
# zdfheute video, not available on zdf.de (uses the fallback extraction path)
|
||||
# Video that requires fallback extraction
|
||||
'url': 'https://www.zdf.de/nachrichten/politik/deutschland/koalitionsverhandlungen-spd-cdu-csu-dobrindt-100.html',
|
||||
'md5': 'c3a78514dd993a5781aa3afe50db51e2',
|
||||
'info_dict': {
|
||||
@@ -391,50 +346,71 @@ class ZDFIE(ZDFBaseIE):
|
||||
'title': 'Dobrindt schließt Steuererhöhungen aus',
|
||||
'description': 'md5:9a117646d7b8df6bc902eb543a9c9023',
|
||||
'duration': 325,
|
||||
'thumbnail': r're:https://www\.zdfheute\.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080\?cb=\d+',
|
||||
'thumbnail': 'https://www.zdfheute.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080?cb=1743357653736',
|
||||
'timestamp': 1743374520,
|
||||
'upload_date': '20250330',
|
||||
'_old_archive_ids': ['zdf 250330_clip_2_bdi'],
|
||||
},
|
||||
}, {
|
||||
# logo! video, also available on zdf.de
|
||||
'url': 'https://www.logo.de/logo-vom-freitag-19-dezember-2025-102.html',
|
||||
'md5': 'cfb1a0988b1249f052a437a55851134b',
|
||||
# FUNK video (hosted on a different CDN, has atypical PTMD and HLS files)
|
||||
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
|
||||
'md5': '57af4423db0455a3975d2dc4578536bc',
|
||||
'info_dict': {
|
||||
'id': 'logo-vom-freitag-19-dezember-2025-102',
|
||||
'id': 'funk-alles-ist-verzaubert-102',
|
||||
'ext': 'mp4',
|
||||
'title': 'logo! vom Freitag, 19. Dezember 2025',
|
||||
'description': 'md5:971428cb563e924c153580f23870c613',
|
||||
'duration': 490.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/iran-rote-erde-sendung-19-dezember-2025-100~original\?cb=\d+',
|
||||
'chapters': 'count:7',
|
||||
'series': 'logo!',
|
||||
'series_id': 'logo-154',
|
||||
'season': 'Season 2025',
|
||||
'season_number': 2025,
|
||||
'episode': 'Episode 382',
|
||||
'episode_number': 382,
|
||||
'timestamp': 1766168700,
|
||||
'upload_date': '20251219',
|
||||
'_old_archive_ids': ['zdf 251219_1925_sendung_log'],
|
||||
'title': 'Alles ist verzaubert',
|
||||
'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
|
||||
'duration': 1278.0,
|
||||
'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~original?cb=1663848412907',
|
||||
'series': 'DRUCK',
|
||||
'series_id': 'funk-collection-funk-11790-1590',
|
||||
'season': 'Season 7',
|
||||
'season_number': 7,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1635520560,
|
||||
'upload_date': '20211029',
|
||||
'_old_archive_ids': ['zdf video_funk_1770473'],
|
||||
},
|
||||
}, {
|
||||
# logo! video, not available on zdf.de (uses the fallback extraction path)
|
||||
'url': 'https://www.logo.de/kinderreporter-vivaan-trifft-alina-grijseels-100.html',
|
||||
'md5': '094cea026babb67aa25fd0108400bc12',
|
||||
'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
|
||||
'info_dict': {
|
||||
'id': 'kinderreporter-vivaan-trifft-alina-grijseels-100',
|
||||
'id': 'das-geld-anderer-leute-100',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vivaan trifft Handballerin Alina Grijseels',
|
||||
'description': 'md5:9572e7f4340dda823ea4091a76624da6',
|
||||
'duration': 166.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/vivaan-alina-grijseels-100~original\?cb=\d+',
|
||||
'series': 'logo!',
|
||||
'series_id': 'logo-154',
|
||||
'timestamp': 1766236320,
|
||||
'upload_date': '20251220',
|
||||
'_old_archive_ids': ['zdf 251219_kr_alina_grijseels_neu_log'],
|
||||
'title': 'Das Geld anderer Leute',
|
||||
'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
|
||||
'duration': 2581.0,
|
||||
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=1920x1080',
|
||||
'series': 'SOKO Stuttgart',
|
||||
'series_id': 'soko-stuttgart-104',
|
||||
'season': 'Season 11',
|
||||
'season_number': 11,
|
||||
'episode': 'Episode 10',
|
||||
'episode_number': 10,
|
||||
'timestamp': 1728983700,
|
||||
'upload_date': '20241015',
|
||||
'_old_archive_ids': ['zdf 191205_1800_sendung_sok8'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/serien/northern-lights/begegnung-auf-der-bruecke-100.html',
|
||||
'info_dict': {
|
||||
'id': 'begegnung-auf-der-bruecke-100',
|
||||
'ext': 'webm',
|
||||
'title': 'Begegnung auf der Brücke',
|
||||
'description': 'md5:e53a555da87447f7f1207f10353f8e45',
|
||||
'duration': 3083.0,
|
||||
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/c5ff1d1f-f5c8-4468-86ac-1b2f1dbecc76?layout=1920x1080',
|
||||
'series': 'Northern Lights',
|
||||
'series_id': 'northern-lights-100',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1738546500,
|
||||
'upload_date': '20250203',
|
||||
'_old_archive_ids': ['zdf 240319_2310_sendung_not'],
|
||||
},
|
||||
'params': {'skip_download': 'geo-restricted http format'},
|
||||
}, {
|
||||
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
|
||||
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
|
||||
@@ -641,7 +617,7 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'title': 'Sylt - Deutschlands edles Nordlicht',
|
||||
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
|
||||
'duration': 810.0,
|
||||
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
|
||||
'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011',
|
||||
'series': 'Sylt - Deutschlands edles Nordlicht',
|
||||
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
|
||||
'timestamp': 1612462500,
|
||||
@@ -658,24 +634,38 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
# All seasons of playlist
|
||||
'url': 'https://www.zdf.de/serien/taunuskrimi/',
|
||||
'info_dict': {
|
||||
'id': 'taunuskrimi-100',
|
||||
'title': 'Taunuskrimi',
|
||||
'description': 'md5:ee7204e9c625c3b611d1274f9d0e3070',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/serien/taunuskrimi/?staffel=1',
|
||||
'info_dict': {
|
||||
'id': 'taunuskrimi-100-s1',
|
||||
'title': 'Taunuskrimi - Season 1',
|
||||
'description': 'md5:ee7204e9c625c3b611d1274f9d0e3070',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/magazine/heute-journal-104',
|
||||
'info_dict': {
|
||||
'id': 'heute-journal-104',
|
||||
'title': 'heute journal',
|
||||
'description': 'md5:6edad39189abf8431795d3d6d7f986b3',
|
||||
},
|
||||
'playlist_mincount': 366,
|
||||
'playlist_mincount': 500,
|
||||
}, {
|
||||
# Only selected season
|
||||
'url': 'https://www.zdf.de/magazine/heute-journal-104?staffel=2025',
|
||||
'url': 'https://www.zdf.de/magazine/heute-journal-104?staffel=2024',
|
||||
'info_dict': {
|
||||
'id': 'heute-journal-104-s2025',
|
||||
'title': 'heute journal - Season 2025',
|
||||
'id': 'heute-journal-104-s2024',
|
||||
'title': 'heute journal - Season 2024',
|
||||
'description': 'md5:6edad39189abf8431795d3d6d7f986b3',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'playlist_maxcount': 365,
|
||||
'playlist_count': 242,
|
||||
'skip': 'Video count changes daily, needs support for playlist_maxcount',
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 24
|
||||
|
||||
@@ -4,15 +4,13 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_filesize,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
traverse_obj,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ZoomIE(InfoExtractor):
|
||||
@@ -89,7 +87,6 @@ class ZoomIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
|
||||
query = {}
|
||||
start_params = traverse_obj(url, {'startTime': ({parse_qs}, 'startTime', -1)})
|
||||
|
||||
if url_type == 'share':
|
||||
webpage = self._get_real_webpage(url, base_url, video_id, 'share')
|
||||
@@ -97,7 +94,7 @@ class ZoomIE(InfoExtractor):
|
||||
redirect_path = self._download_json(
|
||||
f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
|
||||
video_id, note='Downloading share info JSON')['result']['redirectUrl']
|
||||
url = update_url_query(urljoin(base_url, redirect_path), start_params)
|
||||
url = urljoin(base_url, redirect_path)
|
||||
query['continueMode'] = 'true'
|
||||
|
||||
webpage = self._get_real_webpage(url, base_url, video_id, 'play')
|
||||
@@ -106,7 +103,6 @@ class ZoomIE(InfoExtractor):
|
||||
# When things go wrong, file_id can be empty string
|
||||
raise ExtractorError('Unable to extract file ID')
|
||||
|
||||
query.update(start_params)
|
||||
data = self._download_json(
|
||||
f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query=query,
|
||||
note='Downloading play info JSON')['result']
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user